Import Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
from scipy import spatial

Class to Handle the Recommendation Engine

In [42]:
class RecommendationSystem:

  def __init__(self,data_path="/content/data.csv"):
    self.df= pd.read_csv(data_path)
    self.df.rename(columns={'FirstName LastName':"Name"},inplace=True)
    self.textFields= ["uid","Name","Wheat Seller","Sugercane Seller","Rubber Seller","Maize Seller","Barley Seller"]

    self.productValues = {0:["Wheat Freqeuncy","Wheat Quantity"],1:["Sugercane Freqeuncy","Sugercane Quantity"],
                          2:["Rubber Freqeuncy","Rubber Quantity"],3:["Maize Freqeuncy","Maize Quantity"],
                          4:["Barley Freqeuncy","Barley Quantity"]}
    self.productCosines= pd.DataFrame(columns=["ReferenceIndex","ProductId","CosineSim"])

  
  def find_Cosine_Similarity(self, custId, numberOfRecommendations=10, numberOfPeople=10):

    cust_ind=self.df.index[self.df.uid==custId]
    custData=self.df[self.df["uid"]==custId]
    print("Customer: ",(self.df.iloc[cust_ind]['Name'].values)[0])
    self.dfNumeric = self.df[self.df.columns.difference(self.textFields)]
    custData = custData[custData.columns.difference(self.textFields)]
    cosineInitials=np.full(shape=self.df.shape[0],fill_value=1.01)
    self.dfCosineSim=pd.DataFrame(cosineInitials,columns=["Cosine Similarity"])
    # Find Cosine Similarity with Our User and Other Users
    for ind in self.dfNumeric.index:
      if ind==cust_ind:
        continue;
      self.dfCosineSim["Cosine Similarity"].values[ind]=spatial.distance.cosine(custData.values[0],
                                                                                self.dfNumeric.values[ind])
    # Sort the dataframe based on Similarity
    self.dfCosineSim.sort_values(by = "Cosine Similarity",inplace=True)
    self.topIndexes = [i for i in self.dfCosineSim.index[:numberOfPeople]]
    self.dfTopTen = self.dfNumeric.loc[self.topIndexes]
    self.newIndex=1
    
    # Find cosine similarity among products among the similar users
    for ind in range(0,numberOfPeople):
      for prod in self.productValues:
        try:
          prodCosine= spatial.distance.cosine(custData[self.productValues[prod]].values[0],
                                              self.dfTopTen[self.productValues[prod]].values[ind])
          self.productCosines.loc[len(self.productCosines)] = [ind,prod,prodCosine]
          self.newIndex+=1
        except Exception as e:
          print("Error: ",e)
    
    self.productCosines.sort_values(by = "CosineSim",inplace=True)

    print("Most Similar 5 suggestions are:\n",self.productCosines.head(int(numberOfRecommendations/2)),"\n")
    print("Most DiSimilar similar 5 suggestions are:\n",self.productCosines.tail(int(numberOfRecommendations/2)),"\n")

    self.dfSimilar = (self.productCosines.head(int(numberOfRecommendations/2))).ProductId
    self.dfDiSimilar = (self.productCosines.tail(int(numberOfRecommendations/2))).ProductId

    # Prepare the results with a combination of similar and dissimilar products.
    self.result = list()
    for i in range(0,int(numberOfRecommendations/2)):
      SimilarprodId= self.dfSimilar.values[i]
      self.result.append([SimilarprodId,(self.productValues[SimilarprodId][0]).split(" ")[0],"Similar"])
      DiSimilarprodId = self.dfDiSimilar.values[int(numberOfRecommendations/2)-i-1]
      self.result.append([DiSimilarprodId,(self.productValues[DiSimilarprodId][0]).split(" ")[0],"DiSimilar"])




    return self.result



Example

In [43]:
myrecommend = RecommendationSystem("/content/data.csv")

result = myrecommend.find_Cosine_Similarity("user_001")
print("Recommendation is:\n")
result

Customer:  Hayden Ripley
Most Similar 5 suggestions are:
     ReferenceIndex  ProductId  CosineSim
24             4.0        4.0        0.0
29             5.0        4.0        0.0
28             5.0        3.0        0.0
48             9.0        3.0        0.0
23             4.0        3.0        0.0 

Most DiSimilar similar 5 suggestions are:
     ReferenceIndex  ProductId  CosineSim
2              0.0        2.0   0.000110
32             6.0        2.0   0.000197
44             8.0        4.0   0.000647
14             2.0        4.0   0.003303
49             9.0        4.0   0.004076 

Recommendation is:



  dist = 1.0 - uv / np.sqrt(uu * vv)


[[4.0, 'Barley', 'Similar'],
 [4.0, 'Barley', 'DiSimilar'],
 [4.0, 'Barley', 'Similar'],
 [4.0, 'Barley', 'DiSimilar'],
 [3.0, 'Maize', 'Similar'],
 [4.0, 'Barley', 'DiSimilar'],
 [3.0, 'Maize', 'Similar'],
 [2.0, 'Rubber', 'DiSimilar'],
 [3.0, 'Maize', 'Similar'],
 [2.0, 'Rubber', 'DiSimilar']]