In [3]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 

In [4]:
class MF():
    
    def __init__(self, R, K, alpha, beta, iterations):
        """
        Perform matrix factorization to predict empty
        entries in a matrix.
        
        Arguments
        - R (ndarray)   : user-item rating matrix
        - K (int)       : number of latent dimensions
        - alpha (float) : learning rate
        - beta (float)  : regularization parameter
        """
        
        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    def train(self):
        # Initialize user and item latent feature matrice
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))
        
        # Initialize the biases
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])
        
        # Create a list of training samples
        self.samples = [
            (i, j, self.R[i, j])
            for i in range(self.num_users)
            for j in range(self.num_items)
            if self.R[i, j] > 0
        ]
        
        # Perform stochastic gradient descent for number of iterations
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 10 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))
        
        return training_process

    def mse(self):
        """
        A function to compute the total mean square error
        """
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    def sgd(self):
        """
        Perform stochastic graident descent
        """
        for i, j, r in self.samples:
            # Computer prediction and error
            prediction = self.get_rating(i, j)
            e = (r - prediction)
            
            # Update biases
            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])
            
            # Create copy of row of P since we need to update it but use older values for update on Q
            P_i = self.P[i, :][:]
            
            # Update user and item latent feature matrices
            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * P_i - self.beta * self.Q[j,:])

    def get_rating(self, i, j):
        """
        Get the predicted rating of user i and item j
        """
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction
    
    
    def full_matrix(self):
        """
        Computer the full matrix using the resultant biases, P and Q
        """
        return mf.b + mf.b_u[:,np.newaxis] + mf.b_i[np.newaxis:,] + mf.P.dot(mf.Q.T)

In [5]:
data = pd.read_csv("Semester1.csv") 
startpoint=data.shape[0]
matrixOfResults=data.as_matrix()
data.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Arabic,French,English,Historique,Geographiqye,Civil,Islemic,Math,Physique,Siance,technique,Sport,moyS1
0,8.515598,4.296905,18.020091,17.216852,18.818372,0.034102,0.34381,13.197993,4.130546,7.047255,16.60944,14.669856,6.233255
1,13.715768,7.195663,12.931401,7.597216,8.238261,13.513643,15.38734,2.568369,11.483104,19.123799,7.609495,18.952549,9.371943
2,4.425942,1.264214,4.016901,0.706617,10.688538,18.731541,19.549759,9.769914,0.162796,9.794645,1.343329,16.381564,4.5962
3,11.509003,12.985872,16.977693,0.261142,18.573571,7.47504,18.59235,0.888321,5.186108,2.3936,18.811796,10.396384,7.120663
4,16.65925,13.510713,16.962425,11.991368,5.446098,16.007486,3.970325,9.751679,4.827731,11.793804,11.102472,17.471064,8.349985


In [6]:
data1 = pd.read_csv("toPredict.csv") 
toPredict=data1.as_matrix()
data1

  


Unnamed: 0,Arabic,French,English,Historique,Geographiqye,Civil,Islemic,Math,Physique,Siance,technique,Sport,moys
0,4.392681,13.319181,14.638615,18.715449,0.000000,0.000000,0.0,0.0,7.724798,2.180042,2.923221,17.992029,0
1,17.950044,13.515334,8.155196,3.511580,0.000000,0.000000,0.0,0.0,10.053442,12.220086,3.425524,17.124324,0
2,3.066898,1.031118,14.351664,17.376749,0.000000,0.000000,0.0,0.0,16.351716,8.235210,17.343868,13.923942,0
3,13.953155,9.484314,8.459250,2.890639,0.000000,0.000000,0.0,0.0,5.947617,8.684563,12.748544,0.087823,0
4,12.049904,0.281628,18.699233,9.924768,0.000000,0.000000,0.0,0.0,8.035431,12.889419,11.956515,17.324549,0
5,9.142995,3.699085,6.777267,4.357567,0.000000,0.000000,0.0,0.0,10.986205,4.121646,11.910971,17.427970,0
6,4.096917,0.196688,7.057964,2.328700,0.000000,0.000000,0.0,0.0,3.004381,11.793695,8.491197,14.508438,0
7,6.797719,1.115560,10.120121,10.204001,0.000000,0.000000,0.0,0.0,13.570396,14.268158,3.293658,12.274362,0
8,12.645896,5.979419,10.738532,17.775320,0.000000,0.000000,0.0,0.0,1.450959,5.965926,5.646973,13.593288,0
9,3.806941,3.534538,1.346068,17.602277,0.000000,0.000000,0.0,0.0,17.972531,7.840245,18.698296,6.088374,0


In [7]:
result = np.vstack ((matrixOfResults, toPredict) )/20
result.shape
endingPoint=result.shape[0]

In [8]:
mf = MF(result, K=18, alpha=0.1, beta=0.01, iterations=250)
training_process = mf.train()
print()
print("P x Q:")
print(mf.full_matrix())
print()
print("Global bias:")
print(mf.b)
print()
print("User bias:")
print(mf.b_u)
print()
print("Item bias:")
print(mf.b_i)

Iteration: 10 ; error = 35.3059


KeyboardInterrupt: 

In [129]:
mf.get_rating(4000,4)*20

10.012206984215236

In [130]:
notes={}
def moy(listDesMoyenn):
    #l=Arabic 	French 	English 	Historique 	Geographiqye 	Civil 	Islemic 	Math 	Physique 	Siance 	technique 	Sport
    moy=0
    for i in range(3):
        moy+=l[i]*2
    for i in range(3,7):
        moy+=l[i]
    for i in range(7,9):
        moy+=l[i]*4
    for i in range(9,11):
        moy+=l[i]*2
    moy+=l[-1]
    return(moy/23)

In [131]:
import csv
csv_file = open('Prediction.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Arabic', 'French', 'English','Historique','Geographiqye','Civil','Islemic','Math','Physique','Siance','technique','Sport','moys'])

103

In [132]:
for k in range(startpoint,endingPoint):
    print("for user : "+str(k))
    l=[]
    for i in range(len(data.columns)-1):
        #notes[data.columns[i]]=mf.get_rating(k,i)*20
        l.append(mf.get_rating(k,i)*20)
        #ch=data.columns[i] +' : '+ str(mf.get_rating(k,i)*20)
    #print(len(l))
    moyenn=moy(l)
    l.append(moyenn)
    print("moyenn : "+str(moyenn))
    csv_writer.writerow(l)

for user : 4000
moyenn : 9.492379535222623
for user : 4001
moyenn : 10.764531868202596
for user : 4002
moyenn : 11.279513837928757
for user : 4003
moyenn : 8.33582779856831
for user : 4004
moyenn : 10.989733762217385
for user : 4005
moyenn : 8.531855518589175
for user : 4006
moyenn : 5.9654876622362245
for user : 4007
moyenn : 9.140497748351885
for user : 4008
moyenn : 8.307264486891466
for user : 4009
moyenn : 9.905651633200968
for user : 4010
moyenn : 11.940730612097186
for user : 4011
moyenn : 11.125257167674077
for user : 4012
moyenn : 7.615796552968142
for user : 4013
moyenn : 11.769685966903136
for user : 4014
moyenn : 9.924413783974543
for user : 4015
moyenn : 13.970944944213564
for user : 4016
moyenn : 6.847991596377574
for user : 4017
moyenn : 9.987471286373767
for user : 4018
moyenn : 11.940128084140143
for user : 4019
moyenn : 9.70305913758349
for user : 4020
moyenn : 9.543687601750793
for user : 4021
moyenn : 11.248114936584336
for user : 4022
moyenn : 9.399876822463993
for

moyenn : 6.760178106201278
for user : 4530
moyenn : 10.688165861676076
for user : 4531
moyenn : 9.060610572458971
for user : 4532
moyenn : 10.244947610178496
for user : 4533
moyenn : 11.3765841708771
for user : 4534
moyenn : 6.517916590488106
for user : 4535
moyenn : 12.944233462257387
for user : 4536
moyenn : 7.448363907226135
for user : 4537
moyenn : 7.294657627724238
for user : 4538
moyenn : 12.032649967212794
for user : 4539
moyenn : 7.515792929874705
for user : 4540
moyenn : 10.553931644560175
for user : 4541
moyenn : 9.662340522035375
for user : 4542
moyenn : 11.038775211626527
for user : 4543
moyenn : 4.961701329480894
for user : 4544
moyenn : 13.452300648166284
for user : 4545
moyenn : 9.813534430389556
for user : 4546
moyenn : 10.672702478958133
for user : 4547
moyenn : 8.350169394604524
for user : 4548
moyenn : 8.195679424956625
for user : 4549
moyenn : 12.702521638623447
for user : 4550
moyenn : 8.932976637580664
for user : 4551
moyenn : 13.53087598078367
for user : 4552
moy

moyenn : 9.630613570229212
for user : 5010
moyenn : 12.812666624104155
for user : 5011
moyenn : 7.809454986380728
for user : 5012
moyenn : 4.826951516783811
for user : 5013
moyenn : 9.827833097461799
for user : 5014
moyenn : 9.185766981445145
for user : 5015
moyenn : 3.640693139991748
for user : 5016
moyenn : 8.304878760380728
for user : 5017
moyenn : 11.355642817580032
for user : 5018
moyenn : 7.401271821286315
for user : 5019
moyenn : 10.17390291567049
for user : 5020
moyenn : 5.299729868813599
for user : 5021
moyenn : 4.760986213558622
for user : 5022
moyenn : 5.9829301723530515
for user : 5023
moyenn : 10.974925164999318
for user : 5024
moyenn : 5.828772457812269
for user : 5025
moyenn : 13.089656856172173
for user : 5026
moyenn : 13.729291908674325
for user : 5027
moyenn : 9.553957043873881
for user : 5028
moyenn : 9.569633200343185
for user : 5029
moyenn : 6.955467975696246
for user : 5030
moyenn : 8.426631960718382
for user : 5031
moyenn : 8.713492284916208
for user : 5032
moyen

for user : 5498
moyenn : 13.638637012607408
for user : 5499
moyenn : 7.287549230710877


In [133]:
csv_file.close()

In [9]:
import pandas as pd 
data = pd.read_csv("Prediction.csv") 
data

Unnamed: 0,Arabic,French,English,Historique,Geographiqye,Civil,Islemic,Math,Physique,Siance,technique,Sport,moys
0,5.498481,12.981099,14.217513,17.430868,10.012207,10.771957,10.920199,10.201136,8.174295,2.977594,3.641200,17.056000,9.492380
1,17.160235,13.400378,8.695240,4.433281,10.766745,11.393583,11.255359,10.530750,10.275028,11.703235,4.328700,15.936576,10.764532
2,4.018798,1.970612,13.741722,16.315976,11.469551,11.604501,10.655830,10.940709,15.773333,8.399584,16.307495,13.650372,11.279514
3,13.248919,9.139931,8.379212,3.325015,7.956704,8.299969,8.381088,8.582142,6.218672,8.645119,12.289929,1.151785,8.335828
4,12.219783,1.675099,17.639601,10.144937,11.878945,11.889004,10.915974,11.297854,8.621877,12.514345,11.775728,16.606982,10.989734
5,9.234752,4.332307,7.187270,4.859828,8.706881,9.250082,8.283035,8.276221,10.776255,4.452043,11.264590,15.981024,8.531856
6,4.624014,0.827104,6.744889,2.958896,7.022538,7.004717,5.822331,6.113606,3.477669,11.072714,8.072210,13.350767,5.965488
7,7.198451,2.259865,9.947617,10.076322,9.489033,9.267200,8.456822,8.603805,13.251192,13.415968,3.944383,11.989511,9.140498
8,12.359153,6.415266,10.537805,16.422140,9.654832,9.541018,9.503081,9.843896,2.544985,6.268424,6.077072,13.075049,8.307264
9,4.298993,4.010225,2.317860,16.220961,9.628305,9.565152,8.898105,9.442059,16.864311,8.035190,17.260071,6.447303,9.905652
