<a href="https://colab.research.google.com/github/fayazmalik503/Recommendation_Algorithm_Based_on_Blending_Learning/blob/main/Recommendation_Algorithm_Based_on_Blending_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Recommendation Algorithm Based on Blending Learning

**MetricRating.py**

In [7]:
import os
import tensorflow as tf
import numpy as np
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
import matplotlib as mpl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import sys

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import time
def get_time_dif(start_time):
 '''get the running time'''
 time_dif = time.time() - start_time
 return time_dif


 class MetricFRating():
  def __init__(self ,sess, num_users, num_items, learning_rate = 0.05, epoch=200, N = 150, dropout=0.01, batch_size=256):
    self.lr = learning_rate
    self.epochs = epoch
    self.N = N
    self.keep_prob = 1 - dropout
    self.num_users = num_users
    self.num_items = num_items
    self.batch_size = batch_size
    self.sess = sess
    self.max_rating = 5
    self.min_rating = 0
    self.clip_norm = 1.0
    self.confidence = 0.2

  def run(self, train_data, test_data):
    # with tf.device('/cpu:0'):
    self.cf_user_input = tf.placeholder(dtype=tf.int32, shape=[None], name='cf_user_input')
    self.cf_item_input = tf.placeholder(dtype=tf.int32, shape=[None], name='cf_item_input')
    self.y = tf.placeholder("float", [None], 'y')

    U = tf.Variable(tf.random_normal([self.num_users, self.N], mean=0.08, stddev=0.03), dtype=tf.float32)
    V = tf.Variable(tf.random_normal([self.num_items, self.N], mean=0.08, stddev=0.03), dtype=tf.float32)
    B_u = tf.Variable(tf.random_normal([self.num_users], stddev=0.001))
    B_v = tf.Variable(tf.random_normal([self.num_items], stddev=0.001))
    bias_u = tf.nn.embedding_lookup(B_u ,self.cf_user_input)
    bias_v = tf.nn.embedding_lookup(B_v ,self.cf_item_input)
    users = tf.nn.embedding_lookup(U ,self.cf_user_input)
    pos_items = tf.nn.embedding_lookup(V, self.cf_item_input)
    temp = train_data.tocoo()
    item = list(temp.col.reshape(-1))
    user = list(temp.row.reshape(-1))
    rating = list(temp.data)
    mu = np.mean(rating)

    self.pos_distances = tf.clip_by_value( tf.reduce_sum( tf.square(users - pos_items), 1) + bias_u + bias_v + (self.max_rating - mu),self.min_rating, self.max_rating)
    self.pred_distances = tf.clip_by_value(tf.reduce_sum( tf.nn.dropout(tf.square(users -pos_items), self.keep_prob) ,1) + bias_u + bias_v + (self.max_rating - mu) , self.min_rating, self.max_rating) 
    self.loss = tf.reduce_sum( ( 1 + self.confidence * tf.abs(self.y - (self.max_rating )/2)) * tf.square( (self.max_rating - self.y ) - self.pred_distances)) + 0.05* (tf.norm(B_u) + tf.norm(B_v) )
    self.optimizer = tf.train.AdagradOptimizer(self.lr).minimize(self.loss)
    clip_U = tf.assign(U, tf.clip_by_norm(U, self.clip_norm, axes=[1]))
    clip_V = tf.assign(V, tf.clip_by_norm(V, self.clip_norm, axes=[1]))
    init = tf.global_variables_initializer()
    self.sess.run(init)
    start_time = time.time()

    for epoch in range(self.epochs):
      self.num_training = len(rating)
      total_batch = int(self.num_training / self.batch_size)
      idxs = np.random.permutation(self.num_training)
      user_random = list(np.array(user)[idxs])
      item_random = list(np.array(item)[idxs])
      rating_random = list(np.array(rating)[idxs])
    
    for i in range(total_batch):
      batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size]
      batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size]
      batch_rating = rating_random[i * self.batch_size:(i + 1) * self.batch_size]
      _, c, _, _ = self.sess.run((self.optimizer, self.loss, clip_U, clip_V), 
      feed_dict={self.cf_user_input: batch_user,self.cf_item_input: batch_item, self.y: 
      batch_rating})
      avg_cost = c
      if i % 1000 == 0:
        print("Index: %04d; Epoch: %04d; loss = %.9f; time = %.4f" % (i + 1, epoch, np.mean(avg_cost), get_time_dif(start_time)))
      # if (epoch) % 10 == 0 :
      if epoch == self.epochs-1 :#(epoch+1) % 200 == 0 or epoch == self.epochs-1:
        self.predict_ratings = []
        self.actual_ratings = []
        error = 0
        error_mae = 0

        for i in range(len(test_data)):
          pred_rating_test = self.max_rating - self.sess.run([self.pos_distances]          ,feed_dict={self.cf_user_input: [test_data['user_id'][i]-1], 
          self.cf_item_input: [test_data['item_id'][i]-1]})[0] 
          if pred_rating_test < 0:
            pred_rating_test = 0
          elif pred_rating_test > self.max_rating:
            pred_rating_test = self.max_rating

          self.predict_ratings.append(float(pred_rating_test))
          self.actual_ratings.append(float(test_data['rating'][i]))
          error += (float(test_data['rating'][i]) - pred_rating_test) ** 2
          error_mae += (np.abs(float(test_data['rating'][i] ) - pred_rating_test))
          # RMSE=str(np.sqrt(error / len(test_data))[0])
          # MAE=str(np.sqrt(error / len(test_data))[0])
        
        print("RMSE:" + str(np.sqrt(error / len(test_data))[0]) + "; MAE:"+str((error_mae / len(test_data)) [0]) )
        

class MetricPre(object):
  def __init__(self):
    print("begin train:")

  def get_prediction(self,test=0):
    train_data, test_data, n_user, n_item, neg_user_item_matrix, train_user_item_matrix, unqiue = load_rating_data( test, test_size=0.1, sep=" ")
    with tf.Session() as sess:
      model = MetricFRating(sess, n_user, n_item)
      model.run(train_data, test_data)
      preds = model.predict_ratings
    return preds

    

**Bleding Learning.py**

The val_pres and test_pres is used for save the rating prediction of each model, and 
then we used the liner regression to get a weight of each model by val_pres and real 
rating data

In [None]:
!pip install surprise

In [16]:
!pip install loaddata3 import *

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Could not find a version that satisfies the requirement loaddata3 (from versions: none)[0m
[31mERROR: No matching distribution found for loaddata3[0m


In [23]:
import numpy as np
import pandas as pd
import os, sys
from surprise import SVD,SVDpp,NMF,SlopeOne,KNNBasic,KNNWithMeans
# from MetricRating import MetricPre
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
import time
now=time.strftime('%m-%d-%H',time.localtime(time.time()))
# data = Dataset.load_builtin('ml-100k')
'''config'''
clfs=[ SlopeOne(),SVD()]
print('SlopeOne,SVD,MF ' )
dataname='movie'
from surprise import Dataset,Reader
#the path of the rating data
file_path=os.path.expanduser('../data/'+dataname+'_ratings.txt')#movie/filmtrust 
#tell the reader the format of the text
reader = Reader(line_format='user item rating timestamp', sep='\t') #movie
# reader = Reader(line_format='user item rating', sep=' ') #ft

#load data
data=Dataset.load_from_file(file_path,reader=reader)
#split the data into three part:train ,validation ,test,the data will also be loaded in 
MetricF model after being saved in csv 
train_origin, test_origin = train_test_split(data, test_size=.1) 
valset=test_origin[0:int(len(test_origin)/2)]
testset=test_origin[(int(len(test_origin)/2)):]
val= pd.DataFrame(valset)
test = pd.DataFrame(testset)
val.to_csv('../ensemble/'+dataname+'-
valset0.1blend'+now+'.csv',index=False,header=False,sep=',')
test.to_csv('../ensemble/'+dataname+'-
testset0.1blend'+now+'.csv',index=False,header=False,sep=',')

#train model
val_pres = np.zeros((len(valset), (len(clfs)+1))) #+MF
test_pres = np.zeros((len(testset), (len(clfs)+1)))

for j, clf in enumerate(clfs):
  start=time.time()
  algo = clf
  algo.fit(train_origin)
  val_predictions = algo.test(valset)
  test_predictions = algo.test(testset)
  val_pre = []
  test_pre = []
  val_real=[]
  test_real = []
  for i in range(len(val_predictions)):
  val_pre.append(val_predictions[i][3])
  for i in range(len(test_predictions)):
  test_pre.append(test_predictions[i][3])
  for i in range(len(val_predictions)):
  val_real.append(val_predictions[i][2]) 
  for i in range(len(test_predictions)):
  test_real.append(test_predictions[i][2])
  val_pres[:, j] =val_pre #list +1
  test_pres[:, j] = test_pre
  end=time.time()
  print('the model'+str(j+1)+'takes time:'+str(end-start)+'s')
  print('validation set：')

accuracy.rmse(val_predictions)
 accuracy.mae(val_predictions)
 print('test set：')
 accuracy.rmse(test_predictions)
 accuracy.mae(test_predictions)
np.save('../ensemble/val_pres.npy', val_pres)
np.save('../ensemble/val_real.npy', val_real)
np.save('../ensemble/test_pres.npy', test_pres)
np.save('../ensemble/test_real.npy', test_real)
start1 = time.time()
mf=MetricPre()
val_pres[:, j+1] = mf.get_prediction(0) # 
test_pres[:, j+1] = mf.get_prediction(1) # 
end1= time.time()
print('the model'+str(j+2)+'takes time:' + str(end1 - start1) + 's')
np.save( '../ensemble/val_pres0.1_blend'+now+'.npy'+time, val_pres)
np.save('../ensemble/test_pres0.1_blend'+now+'.npy', test_pres)
np.save('../ensemble/val_real0.1_blend'+now+'.npy', val_real)
np.save('../ensemble/test_real0.1_blend'+now+'.npy', test_real)
from sklearn.linear_model import LinearRegression
clf = LinearRegression()
clf.fit(val_pres, val_real)
clf.score(val_pres, val_real)
pref=clf.predict(test_pres)
error=0
error_mae=0
for i in range(len(test_real)):
 error += (test_real[i] - pref[i]) ** 2
 error_mae += (np.abs(test_real[i] - pref[i]))
print("blend’s RMSE:" + str( np.sqrt(error / len(pref)) ) + "; MAE:" + str((error_mae 
/ len(pref))))

SyntaxError: ignored