In [1]:
import pandas as pd 
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse 
import collections
import matplotlib.pyplot as plt
import random

In [2]:
df_train = pd.read_csv("Train.csv")
df_val = pd.read_csv("Validation.csv")
df_train_val = pd.concat([df_train, df_val])

In [4]:
u_index=int(df_train_val['User_ID_Alias'].max()) + 1 
v_index=int(df_train_val['Movie_ID_Alias'].max()) + 1 

#R matrix of prediction, each row is a user and each column is an item - based on training data
R =np.zeros([u_index, v_index])
for i, record in df_train.iterrows():
    row = record
    user_id = int(row['User_ID_Alias'])
    movie_id = int(row['Movie_ID_Alias'])
    rating = int(row['Ratings_Rating'])
    R[user_id, movie_id] = rating
    
#R matrix of prediction, each row is a user and each column is an item - based on validation data    
val_R =np.zeros([u_index, v_index])
for i, record in df_val.iterrows():
    row = record
    user_id = int(row['User_ID_Alias'])
    movie_id = int(row['Movie_ID_Alias'])
    rating = int(row['Ratings_Rating'])
    val_R[user_id, movie_id] = rating
    
    
#R matrix of prediction, each row is a user and each column is an item - based on validation data    
train_val_R =np.zeros([u_index, v_index])
for i, record in df_train_val.iterrows():
    row = record
    user_id = int(row['User_ID_Alias'])
    movie_id = int(row['Movie_ID_Alias'])
    rating = int(row['Ratings_Rating'])
    train_val_R[user_id, movie_id] = rating
    
    
num_users, num_items = train_val_R.shape 

# Create a list of training samples
samples = [
    (i, j, R[i, j])
    for i in range(num_users)
    for j in range(num_items)
    if R[i, j] > 0
]


# Create a list of training+validation samples
final_samples = [
    (i, j, train_val_R[i, j])
    for i in range(num_users)
    for j in range(num_items)
    if train_val_R[i, j] > 0
]


#the 'envoirment' of k (latent)
#K_list=[10, 20, 30, 40, 50, 60]

iterations = 100

In [5]:
def sgd(mu, b_u, b_i, U, V, samples, learning_rate, gama_u, gama_i):
    """
    Perform stochastic graident descent
    """
    for i, j, r in samples:
        # Computer prediction and error
        prediction = mu + b_u[i] + b_i[j] + U[i, :].dot(V[j, :].T)
        #prediction = mu + b_u[i] + b_i[j] + (U[i, :].T).dot(V[j, :])
        e = (r - prediction)

        # Update biases
        b_u[i] += learning_rate * (e - gama_u * b_u[i])
        b_i[j] += learning_rate * (e - gama_i * b_i[j])

        # Update user and item latent feature matrices
        U[i, :] += learning_rate * (e * V[j, :] - gama_u * U[i,:])
        V[j, :] += learning_rate * (e * U[i, :] - gama_i * V[j,:])
    return b_u, b_i, U, V

In [6]:
def msee(mat, mu, b_u, b_i, U, V):
    """
    A function to compute the total mean square error
    """
    xs, ys = mat.nonzero()
    predicted = full_matrix(mu, b_u, b_i, U, V)
    error = 0
    count=0
    for x, y in zip(xs, ys):
        error += pow(mat[x, y] - predicted[x, y], 2)
        count+=1
    ret=error/count#round(np.sqrt(error),2)#float("{:.2f}".format(np.sqrt(error)))#float(np.sqrt(error))
    return ret

#float("{:.1f}".format(x))


In [7]:
def maee(mat, mu, b_u, b_i, U, V):
    """
    A function to compute the total mae
    """
    xs, ys = mat.nonzero()
    predicted = full_matrix(mu, b_u, b_i, U, V)
    error = 0
    count=0
    for x, y in zip(xs, ys):
        error += abs(mat[x, y] - predicted[x, y])
        count+=1
    ret=error/count
    return ret


In [8]:
def r_sq(mat, mu, b_u, b_i, U, V):
    """
    A function to compute the total e squares
    """
    xs, ys = mat.nonzero()
    predicted = full_matrix(mu, b_u, b_i, U, V)
    m=np.mean(mat[np.where(mat != 0)])
    ss_e = 0
    ss_t=0
    for x, y in zip(xs, ys):
        ss_e += pow(mat[x, y] - predicted[x, y], 2)
        ss_t += pow(mat[x, y] - m, 2)
    ret=1-(ss_e/ss_t)
    return ret

In [9]:
def full_matrix(mu, b_u, b_i, U, V):
    """
    Computer the full prediction matrix using the resultant biases, U and V
    """
    return mu + b_u[:,np.newaxis] + b_i[np.newaxis:,] + U.dot(V.T)
    #return mu + b_u[:,np.newaxis] + b_i[np.newaxis:,] + (U.T).dot(V)

In [10]:

# Initialize parameters
K=int(np.random.uniform(5, 40))#random.sample(K_list, k=1)[0]
learning_rate=np.random.uniform(0.0001, 0.003)
gama_u=np.random.uniform(0.01, 0.1)#np.random.uniform(0.01, 0.05)
gama_i=np.random.uniform(0.001, 0.1)#np.random.uniform(0.001, 0.05)

U_init = np.random.normal(scale=1./K, size=(num_users, K))#np.random.randn(num_users, K)
U=U_init
#U=np.random.uniform(0.001, 0.5, size=(num_users, K))
V_init = np.random.normal(scale=1./K, size=(num_items, K))#np.random.randn(num_items, K)
V=V_init
#V=np.random.uniform(0.001, 0.5, size=(num_items, K))

# Initialize the biases
b_u_init = np.random.uniform(0.01, 0.1, size=num_users)#np.zeros(num_users)#np.random.randn(num_users)
b_u=b_u_init

b_i_init = np.random.uniform(0.001, 0.1, size=num_items)#np.zeros(num_items)#np.random.randn(num_items)
b_i=b_i_init

#Initialize Mu
mu = np.mean(R[np.where(R != 0)])
final_mu = np.mean(train_val_R[np.where(train_val_R != 0)])

In [11]:
# First iteration of stochastic gradient descent for number of iterations
training_process = []
val_process = []
lr=learning_rate
best_iteration=0
best_mse=0
for i in range(iterations):
    np.random.shuffle(samples)
    b_u, b_i, U, V=sgd(mu, b_u, b_i, U, V, samples, lr, gama_u, gama_i)
    mse = msee(R, mu, b_u, b_i, U, V)
    val_mse = msee(val_R, mu, b_u, b_i, U, V)
    if i>=2 and val_mse>val_process[i-2][1] and val_process[i-1][1]>val_process[i-2][1]:#early stop
        best_iteration=i-2
        best_mse=val_process[i-2][1]
        break
    best_iteration=i
    best_mse=val_mse
    training_process.append((i, mse))
    val_process.append((i, val_mse))
    lr*=0.9
    if (i+1) % 10 == 0:
        print("HP Iteration: %d ; Iteration: %d ; error = %.4f" % (1, i+1, mse))
        print("HP Iteration: %d ; Iteration: %d ; val_error = %.4f" % (1, i+1, val_mse))

    


HP Iteration: 1 ; Iteration: 10 ; error = 0.8236
HP Iteration: 1 ; Iteration: 10 ; val_error = 0.9306
HP Iteration: 1 ; Iteration: 20 ; error = 0.8163
HP Iteration: 1 ; Iteration: 20 ; val_error = 0.9224
HP Iteration: 1 ; Iteration: 30 ; error = 0.8146
HP Iteration: 1 ; Iteration: 30 ; val_error = 0.9205
HP Iteration: 1 ; Iteration: 40 ; error = 0.8141
HP Iteration: 1 ; Iteration: 40 ; val_error = 0.9199
HP Iteration: 1 ; Iteration: 50 ; error = 0.8139
HP Iteration: 1 ; Iteration: 50 ; val_error = 0.9197
HP Iteration: 1 ; Iteration: 60 ; error = 0.8138
HP Iteration: 1 ; Iteration: 60 ; val_error = 0.9196
HP Iteration: 1 ; Iteration: 70 ; error = 0.8138
HP Iteration: 1 ; Iteration: 70 ; val_error = 0.9196
HP Iteration: 1 ; Iteration: 80 ; error = 0.8138
HP Iteration: 1 ; Iteration: 80 ; val_error = 0.9196
HP Iteration: 1 ; Iteration: 90 ; error = 0.8138
HP Iteration: 1 ; Iteration: 90 ; val_error = 0.9196
HP Iteration: 1 ; Iteration: 100 ; error = 0.8138
HP Iteration: 1 ; Iteration: 100

In [3]:
#val_process

In [12]:
# Another 2 iteration of stochastic gradient descent for number of iterations in order to take the best one out of the 3
for j in range(2):
        
    K_temp=int(np.random.uniform(5, 40))#random.sample(K_list, k=1)[0]
    learning_rate_temp=np.random.uniform(0.0001, 0.003)
    gama_u_temp=np.random.uniform(0.01, 0.1)
    gama_i_temp=np.random.uniform(0.001, 0.1)

    U_init_temp = np.random.normal(scale=1./K, size=(num_users, K))
    U_temp=U_init

    V_init_temp = np.random.normal(scale=1./K, size=(num_items, K))
    V_temp=V_init

    # Initialize the biases
    b_u_init_temp = np.random.uniform(0.01, 0.1, size=num_users)
    b_u_temp=b_u_init_temp

    b_i_init_temp = np.random.uniform(0.001, 0.1, size=num_items)
    b_i_temp=b_i_init_temp
    
    
    
    
    
    training_process_temp = []
    val_process_temp = []
    lr_temp=learning_rate_temp
    best_iteration_temp=0
    best_mse_temp=0
    mse_temp=0
    val_mse_temp=0
    for i in range(iterations):
        np.random.shuffle(samples)
        b_u_temp, b_i_temp, U_temp, V_temp=sgd(mu,b_u_temp,b_i_temp,U_temp,V_temp,samples,lr_temp,gama_u_temp,gama_i_temp)
        mse_temp = msee(R, mu, b_u_temp, b_i_temp, U_temp, V_temp)
        val_mse_temp = msee(val_R, mu, b_u_temp, b_i_temp, U_temp, V_temp)
        if i>=2 and val_mse_temp>val_process_temp[i-2][1] and val_process_temp[i-1][1]>val_process_temp[i-2][1]:#early stop
            best_iteration_temp=i-2
            best_mse_temp=val_process_temp[i-2][1]
            break
        best_iteration_temp=i
        best_mse_temp=val_mse_temp
        training_process_temp.append((i, mse_temp))
        val_process_temp.append((i, val_mse_temp))
        lr_temp*=0.9
        if (i+1) % 10 == 0:
            print("HP Iteration: %d ; Iteration: %d ; error = %.4f" % (j+2, i+1, mse_temp))
            print("HP Iteration: %d ; Iteration: %d ; val_error = %.4f" % (j+2, i+1, val_mse_temp))
            
    if best_mse_temp<best_mse:
        K=K_temp
        learning_rate=learning_rate_temp
        gama_u=gama_u_temp
        gama_i=gama_i_temp

        U_init = U_init_temp
        V_init = V_init_temp

        b_u_init = b_u_init_temp
        b_i_init = b_i_init_temp
        
        best_iteration=best_iteration_temp
        best_mse=best_mse_temp

        
        
        
        
        
        
        

HP Iteration: 2 ; Iteration: 10 ; error = 0.8312
HP Iteration: 2 ; Iteration: 10 ; val_error = 0.9358
HP Iteration: 2 ; Iteration: 20 ; error = 0.8229
HP Iteration: 2 ; Iteration: 20 ; val_error = 0.9259
HP Iteration: 2 ; Iteration: 30 ; error = 0.8209
HP Iteration: 2 ; Iteration: 30 ; val_error = 0.9235
HP Iteration: 2 ; Iteration: 40 ; error = 0.8202
HP Iteration: 2 ; Iteration: 40 ; val_error = 0.9228
HP Iteration: 2 ; Iteration: 50 ; error = 0.8200
HP Iteration: 2 ; Iteration: 50 ; val_error = 0.9225
HP Iteration: 2 ; Iteration: 60 ; error = 0.8200
HP Iteration: 2 ; Iteration: 60 ; val_error = 0.9225
HP Iteration: 2 ; Iteration: 70 ; error = 0.8199
HP Iteration: 2 ; Iteration: 70 ; val_error = 0.9224
HP Iteration: 2 ; Iteration: 80 ; error = 0.8199
HP Iteration: 2 ; Iteration: 80 ; val_error = 0.9224
HP Iteration: 2 ; Iteration: 90 ; error = 0.8199
HP Iteration: 2 ; Iteration: 90 ; val_error = 0.9224
HP Iteration: 2 ; Iteration: 100 ; error = 0.8199
HP Iteration: 2 ; Iteration: 100

In [13]:
#print(best_mse)
#print(K)
#print(best_iteration)
#print(learning_rate)

0.9170333424547393
10
99
0.002017082674284046


In [14]:
# Perform stochastic gradient descent for number of iterations on train+validation
U=U_init
V=V_init

b_u=b_u_init
b_i=b_i_init


training_process = []
#val_process = []
mse=0
#val_mse=0
lr=learning_rate
for i in range(best_iteration+1):
    np.random.shuffle(final_samples)
    b_u, b_i, U, V=sgd(final_mu, b_u, b_i, U, V, final_samples, lr, gama_u, gama_i)
    mse = msee(train_val_R, final_mu, b_u, b_i, U, V)
    #val_mse = msee(train_val_R, final_mu, b_u, b_i, U, V)
    training_process.append((i, mse))
    #val_process.append((i, val_mse))
    lr*=0.9
    if (i+1) % 10 == 0:
        print("HP Iteration: final ; Iteration: %d ; error = %.4f" % (i+1, mse))
        #print("HP Iteration: final ; Iteration: %d ; val_error = %.4f" % (i+1, val_mse))

HP Iteration: final ; Iteration: 10 ; error = 0.8117
HP Iteration: final ; Iteration: 20 ; error = 0.8106
HP Iteration: final ; Iteration: 30 ; error = 0.8103
HP Iteration: final ; Iteration: 40 ; error = 0.8102
HP Iteration: final ; Iteration: 50 ; error = 0.8101
HP Iteration: final ; Iteration: 60 ; error = 0.8101
HP Iteration: final ; Iteration: 70 ; error = 0.8101
HP Iteration: final ; Iteration: 80 ; error = 0.8101
HP Iteration: final ; Iteration: 90 ; error = 0.8101
HP Iteration: final ; Iteration: 100 ; error = 0.8101


In [15]:
#priniting all the errors measures
print(msee(val_R, mu, b_u, b_i, U, V))#mse
print(np.sqrt(msee(val_R, mu, b_u, b_i, U, V)))#rmse
print(maee(val_R, mu, b_u, b_i, U, V))#mae
print(r_sq(val_R, mu, b_u, b_i, U, V))#r squares

0.8889057347002299
0.9428179753803116
0.7441010373641764
0.285133122858226


In [16]:
#predicting the test
df_test = pd.read_csv("Test.csv")
final_prediction = full_matrix(mu, b_u, b_i, U, V)

prediction_list = []
for i, record in df_test.iterrows():
    row = record
    user_id = int(row['User_ID_Alias'])
    movie_id = int(row['Movie_ID_Alias'])
    pred = final_prediction[user_id][movie_id]
    prediction_list.append(pred)
    



In [17]:
#prediction_list

[3.2894648850384196,
 3.036641318884492,
 4.220848824534565,
 4.197592149779362,
 4.4626459489008425,
 2.373140295098273,
 3.3654450287935576,
 3.377929095015889,
 3.9823116742957057,
 2.741732593956268,
 3.931054250278671,
 2.463261650321295,
 3.3737978715423087,
 3.314484457315026,
 4.286713094441346,
 3.4897572097856884,
 4.507330161549034,
 4.359433137440271,
 3.995494095112733,
 3.6960105242483428,
 4.425156607326152,
 3.507998503281434,
 3.922459394264611,
 3.9339779140231417,
 2.917693673169254,
 4.061495586113222,
 3.590685518734263,
 4.052090397822967,
 4.411973579023323,
 4.800743374430408,
 4.058334164238577,
 3.390816248609343,
 3.5414201726976073,
 3.219764742843814,
 2.479279053869807,
 4.481687043990451,
 2.9005573221611156,
 4.191142448978917,
 4.1254106867436295,
 4.299266491427898,
 4.176864498712184,
 3.9779472578814117,
 3.857997392765385,
 4.016228884677843,
 4.139638819074258,
 3.973935781794758,
 4.083493221691852,
 4.345918062322912,
 3.6595433172978122,
 2.5633

In [18]:
df_test["Rating"] = prediction_list

In [19]:
df_test.head()


Unnamed: 0,User_ID_Alias,Movie_ID_Alias,Rating
0,1,552,3.289465
1,2,1246,3.036641
2,3,1809,4.220849
3,4,716,4.197592
4,5,1475,4.462646


In [20]:
#print(final_prediction[0][0])
#print(final_prediction[0][0])

In [21]:
df_test.to_csv("300339785_201314796_203016217.csv", index=False)