# A way to generate good simulated students 

In [None]:
%pylab inline

In [None]:
import statsmodels.api as sm
import pandas as pd
from scipy.stats import rv_continuous, norm, truncnorm, beta
from numba import jit
from igraph import Graph
from igraph.drawing import plot as igraph_plot
from matplotlib.collections import LineCollection
from sklearn.linear_model import LinearRegression
from sklearn.isotonic import IsotonicRegression
from sklearn.utils import check_random_state

In [None]:
def create_map_graph(edges_list,N=None):
    map_graph = Graph(edges_list, directed=True)
    map_graph.vs["label"] = map_graph.vs["name"] = [str(i+1) for i, v in enumerate(map_graph.vs)]
    if N is not None:
        map_graph.vs["item pool"] = [["q_"+str(i+1)+"_"+str(j+1) for j in range(N[i])] for i, v in enumerate(map_graph.vs)]
    return map_graph

def plot_map_graph(map_graph):
    layout = map_graph.layout_sugiyama()
    return igraph_plot(map_graph, bbox=(400, 400), layout=layout)

In [None]:
#Example
edges_list = [
    (0, 1),
    (0, 2),
    (2, 3),
    (1, 3)
]
N = [7, 6, 5, 7] 

In [None]:
map_graph = create_map_graph(edges_list,N)
plot_map_graph(map_graph)

In [None]:
def gamma_corresp(C):
    return set(map_graph.neighbors(C, mode="ALL"))

In [None]:
def gamma_plus(C):
    nodes = set([C])
    linked_nodes = set(map_graph.neighbors(C,mode="OUT"))
    while len(nodes) > 0:
        for node in nodes:
            nodes = set(map_graph.neighbors(node,mode="OUT"))
            linked_nodes=linked_nodes|set(map_graph.neighbors(node,mode="OUT"))
    return linked_nodes

In [None]:
def gamma_plus_plus(C):
    return gamma_plus(C) - gamma_corresp(C) 

In [None]:
def association_item_concept(item, concept):
    return ("q_"+concept+"_"+item.split("_")[2] in map_graph.vs["item pool"][int(concept)-1])&(item.split("_")[1] == concept)

In [None]:
def indirect_association_item_concept(item, concept):
    if association_item_concept(item, concept):
        return False
    return (int(concept)-1 in gamma_plus(item.split("_")[1]))


In [None]:
def total_association_item_concept(item, concept):
    return indirect_association_item_concept(item, concept) + association_item_concept(item, concept)

In [None]:
def proba_with_level(u,k):
    c = .5
    return np.where(x<(k/2.),(x*2.*c)/float(k),(3.*x+2*k-7.)/(5*(k-2.)))

In [None]:
@jit
def displaced_discrete_gaussian(level_max, factor=0):
    p_theta = np.arange(level_max)
    mean = ((k-1)/2.)+factor
    custom_norm = norm(loc=mean,scale=10) 
    p_theta=custom_norm.cdf(p_theta)
    y = np.zeros(level_max)
    y[1:] = p_theta[:-1]
    p_theta = p_theta - y
    return p_theta/sum(p_theta)

In [None]:
k = 10
number_users = 1000
x = np.arange(k)
y = proba_with_level(x,k)
plot(y)

In [None]:
def generate_score_panel_with_hierarchy(k,map_graph,N, number_users):
    level_users = np.zeros([number_users, len(N)])
    answers = {}
    scores = {}
    visited_nodes = set()
    vals = np.arange(k)
    probas = proba_with_level(vals,k)
    unvisited_nodes = set(map_graph.vs)
    mastered = np.zeros([number_users])
    while unvisited_nodes:
        for v in set(unvisited_nodes):
            capsule_pos = int(v["name"]) -1
            answers[capsule_pos] = []
            parents = set(map_graph.vs[i] for i in map_graph.neighbors(v, mode="IN"))
            if parents:
                if parents <= visited_nodes:
                    v["depth"] = np.amax([parent["depth"] for parent in parents]) + 1
                    for user in range(number_users):
                        mastered_previous = any([level_users[user][int(parent["name"])-1] >= 5 for parent in parents])
                        if mastered_previous:
                            offset = v["depth"]
                        else:
                            offset = 0
                        level_users[user][capsule_pos] = np.random.choice(
                            np.arange(k), p=displaced_discrete_gaussian(k,offset))
                        answers[capsule_pos].append(
                            np.random.binomial(
                                p=probas[level_users[user][capsule_pos]], n=1, size=N[capsule_pos]))
                    scores[capsule_pos] = np.mean(answers[capsule_pos],axis=1)
                    visited_nodes.add(v)
                    unvisited_nodes.remove(v)
            else:
                v["depth"] = 1
                for user in range(number_users):
                    level_users[user][0] = np.random.choice(np.arange(k), p=displaced_discrete_gaussian(k))
                    answers[capsule_pos].append(
                            np.random.binomial(
                                p=probas[level_users[user][capsule_pos]], n=1, size=N[capsule_pos]))
                visited_nodes.add(v)
                unvisited_nodes.remove(v)
    return answers,scores,level_users

        

In [None]:
np.random.choice(
                            np.arange(k), p=displaced_discrete_gaussian(k))

In [None]:
generate_score_panel_with_hierarchy(10,map_graph,N, 100)

In [90]:
x = np.arange(10)
proba_with_level(x,10)

array([ 0.   ,  0.1  ,  0.2  ,  0.3  ,  0.4  ,  0.7  ,  0.775,  0.85 ,
        0.925,  1.   ])