# Domain Modeling

In [1]:
# Settings and imports.
%matplotlib inline
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import data

sns.set()
pd.options.display.float_format = '{:.2f}'.format

In [3]:
# Data
tasks = data.load('robomission-2018-02-10/tasks.csv')
ts = data.load('robomission-2018-02-10/task_sessions.csv')
ts = ts[ts.time_spent > 0]
ts = ts.assign(date=ts.end.str[:10])

In [14]:
# All entities in our domain are called chunks. Chunks include
# tasks, concepts, misconceptions, problem sets.
# In general, we can specify our domain using 2 tables:
# - chunks table = name, type and properties of each chunk
# - relationships table = subject-object-predicate-value
# (sort of RDF triples).
# This could be further generalized to be more flexible,
# but this model already subsumes all commonly used domain models.
chunks = pd.DataFrame.from_records(
    columns=['name', 'type', 'order'],
            #..., 'description', 'setting', 'solution'
    data=[
        ('t1', 'task', 1,),
        ('t2', 'task', 2),
        ('t3', 'task', 3),
        ('c1', 'concept', 4),
        ('c2', 'concept', 5),
        ('c12', 'concept', 6),
        ('ps1', 'problem-set', 7),])
chunks

Unnamed: 0,name,type,order
0,t1,task,1
1,t2,task,2
2,t3,task,3
3,c1,concept,4
4,c2,concept,5
5,c12,concept,6
6,ps1,problem-set,7


In [32]:
# Parameters could be included in a chunk table directly,
# but having them separate seems more appropriate given
# their sparcity (+ it reflects different responsibility
# as chunk parameters can be defined and computed by
# several different models).
parameters = pd.DataFrame.from_records(
    columns=['chunk', 'name', 'value'],
    data=[
        # parameters used by the learning system directly
        ('t1', 'good-time', 15),
        ('t2', 'good-time', 25),
        ('t3', 'good-time', 25),
        ('ps1', 'mastery-threshold', 0.9),
        # including cached computed properties
        ('ps1', 'n-tasks', 3),
        
        # parameters used and updated by models
        ('t1', 'elo.difficulty', 0.1),
        ('t2', 'elo.difficulty', 0.6),
        ('t3', 'elo.difficulty', 0.7),
    ])
parameters

Unnamed: 0,chunk,name,value
0,t1,good-time,15.0
1,t2,good-time,25.0
2,t3,good-time,25.0
3,ps1,mastery-threshold,0.9
4,ps1,n-tasks,3.0
5,t1,elo.difficulty,0.1
6,t2,elo.difficulty,0.6
7,t3,elo.difficulty,0.7


In [35]:
relationships = pd.DataFrame.from_records(
    columns=['subject', 'predicate', 'object', 'value'],
    data=[
        # define problem sets
        ('ps1', 'contains', 't1', 1),
        ('ps1', 'contains', 't2', 1),
        ('ps1', 'contains', 't3', 1),
        
        # map problem sets to concepts
        ('ps1', 'practice', 'c12', 1),
        
        # define relationships between tasks and concepts        
        ('t1', 'contains', 'c1', 1.0),
        ('t2', 'contains', 'c1', 0.5),
        ('t2', 'contains', 'c2', 0.9),
        ('t3', 'contains', 'c2', 1.0),
        ('t3', 'contains', 'c12', 1.0),  #?
        
        # define hierarchy between concepts        
        ('c12', 'contains', 'c1', 1),
        ('c12', 'contains', 'c2', 1),
        
        # alterntively we could define similarity between concepts
        #('c1', 'similar', 'c2', 0.2), ....
        # or even similarity between tasks
        #('t1', 'similar', 't2', 0.1), ....
        
        # and/or prerequisities between chunks (concepts/tasks/PS)   
        ('c2', 'prereq', 'c1', 1),  # (prereq.and)
        ('t3', 'prereq.or', 't1', 1),
        ('t3', 'prereq.or', 't2', 1),
    ])

# Semantic of missing relationships: assume value 0/False.

# Pivoting example:
relationships[relationships.predicate == 'contains']\
.pivot(index='subject', columns='object', values='value')\
.fillna(0)

object,c1,c12,c2,t1,t2,t3
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
c12,1.0,0.0,1.0,0.0,0.0,0.0
ps1,0.0,0.0,0.0,1.0,1.0,1.0
t1,1.0,0.0,0.0,0.0,0.0,0.0
t2,0.5,0.0,0.9,0.0,0.0,0.0
t3,0.0,1.0,1.0,0.0,0.0,0.0


In [34]:
# Domain interface (most methods are optional)
class Domain:
    def __init__(self, chunks, parameters, relationships):
        self.chunks = chunks
        self.parameters = parameters
        self.relationships = relationships
        
    def show(self):
        pass