In [114]:
import numpy as np
import pandas as pd
import random
from surprise import KNNWithMeans
from surprise import Dataset, Reader, SVD
from surprise.model_selection import GridSearchCV, cross_validate

In [115]:
df = pd.read_csv("user-item-100k-cleaned.csv")
df_1m = pd.read_csv("user-item-1m-cleaned.csv")
df_raw = pd.read_csv("cleaned_top_5k.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 110187 entries, 0 to 110186
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype
---  ------      --------------   -----
 0   Unnamed: 0  110187 non-null  int64
 1   user_id     110187 non-null  int64
 2   app_id      110187 non-null  int64
 3   rating      110187 non-null  int64
dtypes: int64(4)
memory usage: 3.4 MB


In [116]:
# Here is how df looks
df.head()

Unnamed: 0.1,Unnamed: 0,user_id,app_id,rating
0,0,1,538,1
1,1,1,137,1
2,2,1,3,5
3,3,1,0,2
4,4,1,4,4


In [117]:
# Here is how df_raw looks
df_raw.head()

Unnamed: 0.1,Unnamed: 0,App Name,App Id,Category,Rating,Rating Count,Installs,Minimum Installs,Maximum Installs,Free,...,Size,Minimum Android,Developer Website,Released,Last Updated,Content Rating,Privacy Policy,Ad Supported,In App Purchases,Editors Choice
0,385470,WhatsApp Messenger,com.whatsapp,Communication,4.0,138557570.0,"5,000,000,000+",5000000000.0,6265637751,True,...,19.2,Varies with device,1,"Oct 18, 2010","Jun 09, 2021",Everyone,1,False,False,True
1,304824,Instagram,com.instagram.android,Social,3.8,120206190.0,"1,000,000,000+",1000000000.0,3559871277,True,...,19.2,Varies with device,1,"Apr 3, 2012","Jun 14, 2021",Teen,1,True,True,True
2,2222701,Facebook,com.facebook.katana,Social,2.3,117850066.0,"5,000,000,000+",5000000000.0,6782619635,True,...,19.2,Varies with device,1,"Jun 16, 2021","Jun 16, 2021",Teen,1,True,True,False
3,881403,YouTube,com.google.android.youtube,Video Players & Editors,4.4,112440547.0,"5,000,000,000+",5000000000.0,9766230924,True,...,19.2,Varies with device,1,"Oct 20, 2010","Jun 16, 2021",Teen,1,True,False,False
4,244319,Garena Free Fire - Rampage,com.dts.freefireth,Action,4.2,89177097.0,"500,000,000+",500000000.0,976536041,True,...,19.2,4.1 and up,1,"Dec 7, 2017","Jun 04, 2021",Mature 17+,1,False,True,True


In [118]:
# transfer the data from pandas dataframe into surprise's dataset
reader = Reader(rating_scale=(1, 5))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df_1m[["user_id", "app_id", "rating"]], reader)

In [119]:
# perform cross validation to find the best parameters

param_grid = {
    "n_epochs": [5, 10],
    "lr_all": [0.002, 0.005],
    "reg_all": [0.4, 0.6],
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])

1.294633402345542
{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.6}


In [120]:
# create a model with the best parameters found in the previous step

svd = SVD(verbose=True, n_epochs=5, lr_all=0.002, reg_all=0.6)
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.2965  1.2943  1.2935  1.2947  0.0013  
MAE (testset)     1.1259  1.1242  1.1237  1.1246  0.0009  
Fit time          2.75    3.01    3.05    2.94    0.13    
Test time         3.47    2.84    3.44    3.25    0.29    


{'test_rmse': array([1.29645458, 1.29431298, 1.29345475]),
 'test_mae': array([1.12585892, 1.12422282, 1.12366067]),
 'fit_time': (2.7503724098205566, 3.006537675857544, 3.0481786727905273),
 'test_time': (3.474867820739746, 2.835196018218994, 3.44484543800354)}

In [121]:
trainset = data.build_full_trainset()
svd.fit(trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x24f2deb4f10>

In [122]:
class AppRecommenderSystem:
    
    def __init__(self, ratings_dataset, app_metadata, model):
        self._ratings = ratings_dataset
        self._app_metadata = app_metadata
        self._model = model
        
    def _predict_rating(self, user_id, app_id):
        prediction = self._model.predict(uid=user_id, iid=app_id)
        return prediction.est
    
    def _get_app_metadata(self, app_id):
        return self._app_metadata.iloc[app_id, :]
        
    def _generate_recommendations(self, user_id, number_of_recommendations=5, threshold=3.7):
        appIds = np.arange(0, df_raw.shape[0])
        # shuffle the appIds to introduce some randomness
        random.shuffle(appIds)
        
        recommendations = []
        
        for i in range(0, len(appIds)):
            res = self._predict_rating(user_id, appIds[i])
            if(res > threshold):
                if appIds[i] not in recommendations:
                    recommendations.append(appIds[i])
                    if len(recommendations) == number_of_recommendations: 
                        return recommendations

    
        print("You need to lower the threshold...")
        return []
    
    def _generate_top_n_recommendations(self, user_id, number_of_recommendations):
        appIds = np.arange(0, df_raw.shape[0])
        
        recommendations = { 'Predicted Rating' : [], 'App Id': []}
        
        for i in range(0, len(appIds)):
            res = self._predict_rating(user_id, appIds[i])
            
            recommendations['Predicted Rating'].append(res)
            recommendations['App Id'].append(i)
            
        
        sorted_recommendations = pd.DataFrame(recommendations).sort_values('Predicted Rating', ascending=False)
        result = self._clean_recommendations(user_id, sorted_recommendations)
        return result.head(number_of_recommendations)

    
    def _clean_recommendations(self, user_id, recommendations):
        rated_apps = list(self.get_user_ratings(user_id)['App Id'].values)
        
        return recommendations[~recommendations['App Id'].isin(rated_apps)]
        
    
    def _generate_recommendations_dataframe(self, ids):
        apps = []
    
        for x in ids:
            apps.append(self._get_app_metadata(x))

        return pd.DataFrame(apps)
    
    def recommend_with_threshold(self, user_id, number_of_recommendations=5, threshold=3.7):
        result = self._generate_recommendations_dataframe(self._generate_recommendations(user_id, number_of_recommendations, threshold))
        
        return result
    
    def recommend(self, user_id, number_of_recommendations):
        result = self._generate_top_n_recommendations(user_id, number_of_recommendations)
    
        print(result)
    
        return self._generate_recommendations_dataframe(list(result['App Id']))
        
    def get_user_ratings(self, user_id):
        user_ratings = self._ratings[self._ratings['user_id'] == user_id]

        apps = { 'App Id': [], 'App Name': []}

        for x in user_ratings['app_id'].values:
            apps['App Id'].append(x)
            apps['App Name'].append(self._app_metadata.iloc[x, 1])

        applications = pd.DataFrame(apps)
        applications["users_given_rating"] = user_ratings['rating'].values

        return applications
        
        

In [123]:
app_recommender = AppRecommenderSystem(df_1m, df_raw, svd)

In [124]:
app_recommender.recommend(90, 5)

      Predicted Rating  App Id
2683          3.861379    2683
2409          3.846773    2409
3896          3.845685    3896
2665          3.828413    2665
4577          3.827333    4577


Unnamed: 0.1,Unnamed: 0,App Name,App Id,Category,Rating,Rating Count,Installs,Minimum Installs,Maximum Installs,Free,...,Size,Minimum Android,Developer Website,Released,Last Updated,Content Rating,Privacy Policy,Ad Supported,In App Purchases,Editors Choice
2683,1558604,Bingo Party - Free Classic Bingo Games Online,com.bingo.tour.party.crazy.free,Board,4.6,314441.0,"5,000,000+",5000000.0,9658388,True,...,100.0,4.1 and up,0,"May 8, 2017","May 21, 2021",Teen,1,True,True,False
2409,168838,السوق المفتوح - OpenSooq,com.opensooq.OpenSooq,Shopping,4.5,349703.0,"10,000,000+",10000000.0,24976573,True,...,22.0,4.2 and up,1,"Oct 13, 2013","May 04, 2021",Everyone,1,True,True,False
3896,828601,"adidas Training app - Fitness, Home & Gym Workout",com.runtastic.android.results.lite,Health & Fitness,4.6,208815.0,"10,000,000+",10000000.0,25506715,True,...,78.0,6.0 and up,1,"Nov 12, 2015","Jun 08, 2021",Everyone,1,False,True,False
2665,1868372,"UC Browser Turbo- Fast Download, Secure, Ad Block",com.ucturbo,Communication,4.3,316249.0,"10,000,000+",10000000.0,47722864,True,...,19.2,4.1 and up,1,"Mar 29, 2019","Aug 04, 2020",Everyone,1,False,False,False
4577,258721,Bike Stunt Race 3d Bike Racing Games – Bike game,com.knights.bikesstunt.motomaster,Simulation,4.1,172583.0,"100,000,000+",100000000.0,124499897,True,...,52.0,4.1 and up,1,"Oct 15, 2017","May 22, 2021",Everyone,1,True,True,False


In [113]:
app_recommender.get_user_ratings(90)

Unnamed: 0,App Id,App Name,users_given_rating
0,1336,ClipClaps - Reward your interest,4
1,0,WhatsApp Messenger,5
2,4259,Face Changer 2,5
3,5,Messenger – Text and Video Chat for Free,4
4,46,Duolingo: Learn Languages Free,2
5,17,Roblox,5
6,13,Clash Royale,5
7,11,Google Chrome: Fast & Secure,5
