## An simple model to predict:
### 1. ability of each student
### 2. attributes of each problem
### 3. whether she/he will make a correct choice.

In [1]:
### import libraries
import numpy as np
import tensorflow as tf
import time
import xlrd, csv
import pandas as pd

In [2]:
### open the chemistry exam record 
workbook = xlrd.open_workbook('chem_20180201.xlsx')
names = workbook.sheet_names()

xlsx = pd.ExcelFile('chem_20180201.xlsx')
df1 = xlsx.parse(names[0], header=None)
df2 = xlsx.parse(names[1], header=None)
df3 = xlsx.parse(names[2], header=None)

In [39]:
### process the data 
df2_ = df2.dropna(axis = 1)
df2_.columns = range(11)

ans0 = (df2_.loc[0][1:11]==df2_.loc[3][1:11])
ans1 = (df2_.loc[1][1:11]==df2_.loc[3][1:11])
ans2 = (df2_.loc[2][1:11]==df2_.loc[3][1:11])
df2_.loc[0][1:11] = ans0
df2_.loc[1][1:11] = ans1
df2_.loc[2][1:11] = ans2

scores = (np.array(df2_.loc[:2,1:11])).astype(np.float)

In [38]:
### "scores" indicates how each student scores on each problem
###  (3 students, 10 problems)
print(scores)

[[ 1.  1.  1.  1.  1.  0.  1.  1.  1.  0.]
 [ 1.  1.  0.  1.  1.  1.  1.  1.  1.  1.]
 [ 0.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]


In [4]:
'''
these parameters means: 
number of students
number of problems
number of latent factors
ratio of missing values and complete data
number of missing values
hyper parameters 1, to restrict the sum of elements in each problem to be one
hyper parameters 2, to restrict elements in each problem to be less than one  
'''
n_student = 3
n_prob = 10
n_latent = 1
ratio = 0.1
n_zeros = int(n_student*n_prob* ratio)
lamb1 = 0.01
lamb2 = 5.  

In [5]:
# proj_mat defines positions to drop a ratio of data randomly
# (optional)
proj_mat = np.ones(n_student*n_prob)
proj_mat[:n_zeros] = 0
np.random.shuffle( proj_mat )
proj_mat = proj_mat.reshape([n_student, n_prob])
test_mat = np.ones([n_student, n_prob]) - proj_mat

In [6]:
proj_mat 

array([[ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.]])

In [7]:
test_mat

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

### Use tensorflow(TF) to construct model:

In [8]:
sess = tf.Session()

In [9]:
class recomend:
    def __init__(self, n_student, n_prob, n_latent, ratio, lamb1, lamb2):
        self.ones = tf.constant(np.ones([n_student,n_prob]), dtype=tf.float32)
        self.n_student = n_student
        self.n_prob = n_prob
        #self.lamb = lamb
        self.scores = tf.placeholder(dtype = tf.float32 ,shape = [n_student, n_prob])
        self.proj_ = tf.placeholder(dtype = tf.float32 ,shape = [n_student, n_prob])
        self.s_ = tf.get_variable("student", shape=[n_student, n_latent])
        self.s_bias = tf.get_variable("student_bias", shape=[n_student, 1])
        self.p_ = tf.get_variable("problem", shape=[n_prob, n_latent])
        self.p_bias = tf.get_variable("problem_bias", shape=[n_prob, 1])
        
        self.predict =  tf.sigmoid(
            tf.matmul(self.s_, tf.transpose(self.p_) ) + 
            tf.tile(self.s_bias, [1, n_prob]) +
            tf.transpose(tf.tile(self.p_bias, [1, n_student]))
                                  )
        
        self.loss_0 = tf.reduce_sum(tf.mul(-( 
                                 self.scores*tf.log(self.predict +1e-6) 
                   + (self.ones-self.scores)*tf.log(self.ones - self.predict  )   
                                        ),self.proj_  ))/(n_student*n_prob* (1-ratio) )
        self.loss_1 = lamb1 * (tf.reduce_sum(tf.square(self.p_) ) 
                              + tf.reduce_sum(tf.square(self.s_) )  )
#        loss_0 = tf.reduce_sum(tf.square(self.predict-self.scores) )/(n_student*n_prob* (1-ratio) )
#        loss_1 = lamb1 * tf.reduce_sum(tf.square(tf.reduce_sum(self.p_, reduction_indices = 1) - self.ones) )
#        loss_2 = lamb2 * tf.reduce_sum(tf.pow(self.p_, 8) )/(n_student*n_prob* (1-ratio) )
#        self.loss = loss_0 + loss_1 + loss_2
        self.loss = self.loss_0 + self.loss_1

In [10]:
model = recomend(n_student, n_prob, n_latent, ratio, lamb1, lamb2)

In [11]:
sess.run(tf.initialize_all_variables() )

In [12]:
train_op = tf.train.GradientDescentOptimizer(1e-3).minimize(model.loss)
#train_op = tf.train.AdamOptimizer(1e-4).minimize(model.loss)

In [13]:
## start training:
delta = 10
loss_old = 1e3
steps = 0
t0 = 0
while loss_old > 23e-2:
    sess.run(train_op, feed_dict = {model.scores:scores, model.proj_: proj_mat })

    loss_0 = sess.run(model.loss_0, feed_dict = {model.scores:scores,
                                    model.proj_: proj_mat })
    loss_1 = sess.run(model.loss_1, feed_dict = {model.scores:scores,
                                    model.proj_: proj_mat })
#    loss_new = sess.run(model.loss, feed_dict = {model.scores:scores,
#                                    model.proj_: proj_mat })
    loss_new = loss_0 + loss_1
#    delta = loss_old - loss_new
    loss_old = loss_new
    steps +=1
    if steps%10000 ==0:
        t1 = time.time() - t0
        print(steps, loss_0, loss_1, t1)
        t0 = time.time()

(10000, 0.36491638, 0.012693423, 1517716458.396111)
(20000, 0.28395164, 0.01071397, 13.513853073120117)
(30000, 0.24804002, 0.010314198, 13.521505117416382)
(40000, 0.22590114, 0.01046444, 13.552303075790405)


In [14]:
## final loss  and   delta
print(loss_new, delta)

(0.22999857, 10)


### Let's see the results:

In [50]:
print(sess.run(model.s_))

[[-0.15659647]
 [ 0.2852715 ]
 [-0.60890943]]


In [48]:
print(sess.run(model.p_))

[[ 0.58920366]
 [-0.14931335]
 [-0.08959205]
 [-0.12398325]
 [ 0.11247037]
 [ 0.08131763]
 [-0.11876544]
 [-0.14667065]
 [-0.17851865]
 [ 0.31801909]]


In [51]:
print(sess.run(model.s_bias) )

[[ 0.95527393]
 [ 2.28514433]
 [ 1.18731129]]


In [52]:
print(sess.run(model.p_bias))

[[-0.09135499]
 [ 0.96259308]
 [ 0.52969378]
 [ 1.10508418]
 [ 1.09382975]
 [-0.24853112]
 [ 0.83611989]
 [ 0.99966627]
 [ 1.11295152]
 [-0.85973352]]


### counting wrong predicts

In [67]:
predict = sess.run(model.predict)
n_wrong = int(np.sum(np.square(np.sign(predict-0.5) - scores)*test_mat))
print(str(n_wrong)+' wrong predictions within '+ str(n_zeros) +' test samples' )

1 wrong predictions within 3 test samples


### All predict results 

In [55]:
print(predict)

[[ 0.68387806  0.87448925  0.81742489  0.88892168  0.88406408  0.66685808
   0.85936093  0.87845802  0.89053136  0.51143295]
 [ 0.91387326  0.96102738  0.94209385  0.96626365  0.96804839  0.88693517
   0.95636934  0.96241969  0.96601206  0.81996822]
 [ 0.67637992  0.90385598  0.85465145  0.91434568  0.90138036  0.70873845
   0.89048922  0.90689397  0.9175083   0.53343326]]


In [56]:
print(scores)

[[ 1.  1.  1.  1.  1.  0.  1.  1.  1.  0.]
 [ 1.  1.  0.  1.  1.  1.  1.  1.  1.  1.]
 [ 0.  1.  1.  1.  1.  1.  1.  1.  1.  1.]]


In [17]:
ans_p = sess.run(model.p_, feed_dict = {model.scores:scores, model.proj_: proj_mat })
ans_s = sess.run(model.s_, feed_dict = {model.scores:scores, model.proj_: proj_mat })

In [18]:
print(np.max(ans_p))
print(np.max(ans_s))
print(np.max(p_mat))
print(np.max(s_mat))

1.16439
19.0288
0.536235179729
99


### The matrix "scores" corresponds to the actual scores students get,  each row corresponds to one student

In [19]:
scores

array([[ 52.67019706,  77.35301135,  59.15939095, ...,  59.93032837,
          0.        ,   0.        ],
       [ 47.74065807,  37.63227162,  35.40073239, ...,  54.0015399 ,
         55.28178138,  42.90616783],
       [ 46.38629337,  41.00573029,   0.        , ...,  40.05709955,
         45.1835761 ,  32.13017566],
       ..., 
       [ 32.62758583,  37.35290522,   0.        , ...,  41.88571342,
         34.00440983,  45.5966174 ],
       [ 44.59803368,  47.51215155,  42.46353872, ...,  35.88972598,
          0.        ,  19.03881936],
       [ 64.49604241,  41.26206108,  55.01551282, ...,  49.74749856,
         66.05305759,  57.30576901]])

### The matrix "predict" corresponds to the scores predicted by the model

In [20]:
predict = sess.run(model.predict, feed_dict = {model.scores:scores, model.proj_: proj_mat })
predict 

array([[ 50.31025696,  51.0971756 ,  50.85494995, ...,  49.55835342,
          0.        ,   0.        ],
       [ 42.00656891,  42.65892029,  42.45944595, ...,  41.37759399,
         41.76087952,  41.22695923],
       [ 51.43185425,  52.22583389,   0.        , ...,  50.65138626,
         51.12961197,  50.46997452],
       ..., 
       [ 31.10415268,  31.58787346,   0.        , ...,  30.63759232,
         30.92169189,  30.52473068],
       [ 41.2207756 ,  41.86265564,  41.66644669, ...,  40.59862518,
          0.        ,  40.44944   ],
       [ 60.9866066 ,  61.92451477,  61.64091492, ...,  60.0588913 ,
         60.62866211,  59.8453331 ]], dtype=float32)

In [21]:
p_mat[0]

array([ 0.2280732 ,  0.17640353,  0.06800562,  0.20650069,  0.32101697])

In [22]:
ans_p[0]

array([ 1.02335465,  1.11626089, -1.13355446,  1.12117815, -1.12711513], dtype=float32)