In [19]:
import numpy as np
import pandas as pd
import random
from surprise import KNNWithMeans
from surprise import Dataset, Reader, SVD
from surprise.model_selection import GridSearchCV, cross_validate

In [10]:
df = pd.read_csv("user-item-100k-cleaned.csv")
df_1m = pd.read_csv("user-item-1m-cleaned.csv")
df_raw = pd.read_csv("cleaned_top_5k.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 110187 entries, 0 to 110186
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype
---  ------      --------------   -----
 0   Unnamed: 0  110187 non-null  int64
 1   user_id     110187 non-null  int64
 2   app_id      110187 non-null  int64
 3   rating      110187 non-null  int64
dtypes: int64(4)
memory usage: 3.4 MB


In [11]:
# Here is how df looks
df.head()

Unnamed: 0.1,Unnamed: 0,user_id,app_id,rating
0,0,1,538,1
1,1,1,137,1
2,2,1,3,5
3,3,1,0,2
4,4,1,4,4


In [12]:
# Here is how df_raw looks
df_raw.head()

Unnamed: 0.1,Unnamed: 0,App Name,App Id,Category,Rating,Rating Count,Installs,Minimum Installs,Maximum Installs,Free,...,Size,Minimum Android,Developer Website,Released,Last Updated,Content Rating,Privacy Policy,Ad Supported,In App Purchases,Editors Choice
0,385470,WhatsApp Messenger,com.whatsapp,Communication,4.0,138557570.0,"5,000,000,000+",5000000000.0,6265637751,True,...,19.2,Varies with device,1,"Oct 18, 2010","Jun 09, 2021",Everyone,1,False,False,True
1,304824,Instagram,com.instagram.android,Social,3.8,120206190.0,"1,000,000,000+",1000000000.0,3559871277,True,...,19.2,Varies with device,1,"Apr 3, 2012","Jun 14, 2021",Teen,1,True,True,True
2,2222701,Facebook,com.facebook.katana,Social,2.3,117850066.0,"5,000,000,000+",5000000000.0,6782619635,True,...,19.2,Varies with device,1,"Jun 16, 2021","Jun 16, 2021",Teen,1,True,True,False
3,881403,YouTube,com.google.android.youtube,Video Players & Editors,4.4,112440547.0,"5,000,000,000+",5000000000.0,9766230924,True,...,19.2,Varies with device,1,"Oct 20, 2010","Jun 16, 2021",Teen,1,True,False,False
4,244319,Garena Free Fire - Rampage,com.dts.freefireth,Action,4.2,89177097.0,"500,000,000+",500000000.0,976536041,True,...,19.2,4.1 and up,1,"Dec 7, 2017","Jun 04, 2021",Mature 17+,1,False,True,True


In [13]:
# transfer the data from pandas dataframe into surprise's dataset
reader = Reader(rating_scale=(1, 5))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df_1m[["user_id", "app_id", "rating"]], reader)

In [7]:
# perform cross validation to find the best parameters

param_grid = {
    "n_epochs": [5, 10],
    "lr_all": [0.002, 0.005],
    "reg_all": [0.4, 0.6]
}
gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=3)

gs.fit(data)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])

1.2945698752969719
{'n_epochs': 5, 'lr_all': 0.002, 'reg_all': 0.6}


In [14]:
# create a model with the best parameters found in the previous step

svd = SVD(verbose=True, n_epochs=5, lr_all=0.002, reg_all=0.6)
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    1.2929  1.2959  1.2954  1.2947  0.0013  
MAE (testset)     1.1232  1.1248  1.1256  1.1245  0.0010  
Fit time          2.64    2.91    2.92    2.82    0.13    
Test time         3.05    3.10    2.98    3.04    0.05    


{'test_rmse': array([1.29292135, 1.29591529, 1.29539864]),
 'test_mae': array([1.12315479, 1.12478907, 1.12555827]),
 'fit_time': (2.6386773586273193, 2.9087672233581543, 2.9182825088500977),
 'test_time': (3.0462863445281982, 3.095254421234131, 2.9838907718658447)}

In [15]:
trainset = data.build_full_trainset()
svd.fit(trainset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x24f0331bf10>

In [16]:
class AppRecommenderSystem:
    
    def __init__(self, ratings_dataset, app_metadata, model):
        self._ratings = ratings_dataset
        self._app_metadata = app_metadata
        self._model = model
        
    def _predict_rating(self, user_id, app_id):
        prediction = self._model.predict(uid=user_id, iid=app_id)
        return prediction.est
    
    def _get_app_metadata(self, app_id):
        return self._app_metadata.iloc[app_id, :]
        
    def _generate_recommendations(self, user_id, number_of_recommendations=5, threshold=3.7):
        appIds = np.arange(0, df_raw.shape[0])
        # shuffle the appIds to introduce some randomness
        random.shuffle(appIds)
        
        recommendations = []
        
        for i in range(0, len(appIds)):
            res = self._predict_rating(user_id, appIds[i])
            if(res > threshold):
                if appIds[i] not in recommendations:
                    recommendations.append(appIds[i])
                    if len(recommendations) == number_of_recommendations: 
                        return recommendations

    
        print("You need to lower the threshold...")
        return []
    
    def _generate_recommendations_dataframe(self, ids):
        apps = []
    
        for x in ids:
            apps.append(self._get_app_metadata(x))

        return pd.DataFrame(apps)
    
    def recommend(self, user_id, number_of_recommendations=5, threshold=3.7):
        result = self._generate_recommendations_dataframe(self._generate_recommendations(user_id, number_of_recommendations, threshold))
        
        return result
        
    def get_user_ratings(self, user_id):
        user_ratings = self._ratings[self._ratings['user_id'] == user_id]

        apps = []

        for x in user_ratings['app_id'].values:
            apps.append(self._app_metadata.iloc[x, 1])

        applications = pd.DataFrame(apps)
        applications["users_given_rating"] = user_ratings['rating'].values

        return applications
        
        

In [23]:
app_recommender = AppRecommenderSystem(df_1m, df_raw, svd)

In [26]:
app_recommender.recommend(1, 5, 3.7)

Unnamed: 0.1,Unnamed: 0,App Name,App Id,Category,Rating,Rating Count,Installs,Minimum Installs,Maximum Installs,Free,...,Size,Minimum Android,Developer Website,Released,Last Updated,Content Rating,Privacy Policy,Ad Supported,In App Purchases,Editors Choice
2074,734988,Hit & Knock down,com.mobirix.throwball,Sports,4.3,402913.0,"10,000,000+",10000000.0,43958497,True,...,67.0,4.4 and up,1,"Jul 30, 2018","Feb 09, 2021",Everyone,1,True,True,False
4736,1880157,Talking Babsy Baby,com.kauf.talking.baum.TalkingBabsyBaby,Lifestyle,4.3,166157.0,"10,000,000+",10000000.0,36519198,True,...,115.0,4.4 and up,1,"Jan 7, 2011","Feb 05, 2021",Everyone,1,True,True,False
1235,1506015,YONO SBI: The Mobile Banking and Lifestyle App!,com.sbi.lotusintouch,Finance,4.0,674001.0,"50,000,000+",50000000.0,78947923,True,...,32.0,5.1 and up,1,"Dec 4, 2017","Apr 18, 2021",Everyone,1,False,False,False
2589,10495,Rope Frog Ninja Hero - Strange Gangster Vegas,com.assassingames.ninjafrogrope,Action,4.3,325123.0,"50,000,000+",50000000.0,63518885,True,...,152.0,5.0 and up,1,"Jan 22, 2019","May 27, 2021",Teen,1,True,False,False
1509,1185732,OLX Classifieds of Kazakhstan,kz.slando,Shopping,4.4,554552.0,"10,000,000+",10000000.0,21725328,True,...,118.0,6.0 and up,1,"Jun 14, 2013","Jun 09, 2021",Everyone,1,False,False,False


In [25]:
app_recommender.get_user_ratings(1)

Unnamed: 0,0,users_given_rating
0,WhatsApp Messenger,5
1,Moto Traffic Race,3
2,"Tinder - Dating, Make Friends and Meet New People",5
3,Uber Driver,4
4,MARVEL Strike Force: Squad RPG,4
5,Cover Fire: Offline Shooting Games,5
6,Clash of Clans,5
7,TikTok,5
8,DEAD TARGET: Zombie Offline - Shooting Games,2
9,GO Launcher - 3D parallax Themes & HD Wallpapers,5
