In [4]:
import ast
import gc
import os
import re
from datetime import datetime

import matplotlib.pyplot as plt
import nltk
import numpy as np
import pandas as pd
import seaborn as sns
from IPython.display import SVG
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.layers import (LSTM, Activation, Conv1D, Dense, Dropout, Embedding,
                          Flatten, Input, MaxPooling1D, concatenate, dot)
from keras.models import Model, Sequential
from keras.utils.vis_utils import model_to_dot
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from scipy.spatial.distance import correlation, cosine
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import cosine_similarity, linear_kernel
from sklearn.model_selection import train_test_split
from surprise import (NMF, SVD, BaselineOnly, Dataset, KNNBasic,
                      NormalPredictor, Reader)
from surprise.model_selection import (GridSearchCV, KFold, RandomizedSearchCV,
                                      cross_validate)
from tqdm import tqdm

tqdm.pandas()

In [5]:

# Cleaning functions
def cleanHtml(text: str) -> str:
    """
    Removes HTML symbols from text

    Args:
        text (str): input text

    Returns:
        str: text without things like %2E in it
    """
    html = re.compile(r"<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});")
    return re.sub(html, r' ', text)


def cleanUrl(text: str) -> str:
    """
    Removes http(s) urls from text

    Args:
        text (str): input text

    Returns:
        str: output text without urls like https://google.com/
    """
    return re.sub(r"https?://\S+|www\.\S+", r' ', text)


def cleanAscii(text: str) -> str:
    """
    Remove non ASCII characters

    Args:
        text (str): input text

    Returns:
        str: text without things like carriage returns, tabs, null bytes
             removes from \x00 (null byte) to \x7f (delete)
    """
    return re.sub(r'[^\x00-\x7f]', r' ', text)


def cleanSpecChar(text: str) -> str:
    """
    Removes misc special unicode chars

    Args:
        text (str): input text

    Returns:
        str: text without emojis, pictographs, maps, flags.. 
    """
    specPattern = re.compile(
        '['
        u'\U0001F600-\U0001F64F'  # emoticons
        u'\U0001F300-\U0001F5FF'  # symbols & pictographs
        u'\U0001F680-\U0001F6FF'  # transport & map symbols
        u'\U0001F1E0-\U0001F1FF'  # flags (iOS)
        u'\U00002702-\U000027B0'
        u'\U000024C2-\U0001F251'
        ']+',
        flags=re.UNICODE)
    return specPattern.sub(r' ', text)


def cleanSpaces(text: str) -> str:
    """
    Converts multiple spaces in text to a single space

    Args:
        text (str): input text

    Returns:
        str: text with at most contiguous space
    """
    return re.sub(r'\s{2,}', r' ', text)


def fullClean(text: str) -> str:
    """
    Compiles the above cleaning functions

    Args:
        text (str): input text

    Returns:
        str: cleaned output text, lowercase
    """
    return cleanSpaces(cleanSpecChar(cleanAscii(cleanHtml(cleanUrl(text))))).strip().lower()

# nltk utilities
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [6]:
df_games = pd.read_pickle('./pickles/df_games.pkl') #* local dependancy
df_games.dropna(subset = ['desc'], inplace=True)
desc = df_games.desc.progress_apply(fullClean)
print(desc.shape)
df_games.sample(5)


100%|██████████| 686/686 [00:00<00:00, 7291.34it/s]

(686,)





Unnamed: 0,name,count,appid,requiredAge,metacritic,price,windows,mac,linux,releaseDate,acheiveCount,recs,publishers,developers,desc,header_image,genres,categories
217,Call of Duty,54,1938090.0,18,,8999.0,True,False,False,"27 Oct, 2022",24.0,172821.0,[Activision],"[Infinity Ward, Raven Software, Beenox]",Existing Modern Warfare® II Digital Standard E...,https://cdn.akamai.steamstatic.com/steam/apps/...,[Action],"[Single-player, Multi-player, PvP, Online PvP,..."
443,Colony Survival,20,366090.0,0,,1999.0,True,True,True,"Jun 16, 2017",50.0,5887.0,[Pipliz],[Pipliz],"<img src=""https://cdn.akamai.steamstatic.com/s...",https://cdn.akamai.steamstatic.com/steam/apps/...,"[Action, Adventure, Indie, Strategy, Early Acc...","[Single-player, Multi-player, PvP, Online PvP,..."
423,Battlefleet Gothic: Armada,22,573100.0,0,77.0,1999.0,True,False,False,"Jan 24, 2019",91.0,6862.0,[Focus Entertainment],[Tindalos Interactive],<strong>Battlefleet Gothic: Armada 2</strong> ...,https://cdn.akamai.steamstatic.com/steam/apps/...,[Strategy],"[Single-player, Multi-player, PvP, Online PvP,..."
292,The Sims 3,32,47890.0,0,86.0,1999.0,True,False,False,"Jan 27, 2011",,25784.0,[Electronic Arts],[The Sims Studio],<strong>Play with Life.</strong><br><br>Create...,https://cdn.akamai.steamstatic.com/steam/apps/...,[Simulation],[Single-player]
23,Stardew Valley,356,413150.0,0,89.0,1600000.0,True,True,True,"26 Feb, 2016",40.0,451572.0,[ConcernedApe],[ConcernedApe],Stardew Valley is an open-ended country-life R...,https://cdn.akamai.steamstatic.com/steam/apps/...,"[Indie, RPG, Simulation]","[Single-player, Multi-player, Co-op, Online Co..."


In [7]:
desc[100]

'build a resistance made from anyone in the world to take back a near-future london that is facing its downfall. recipient of over 65 e3 awards and nominations. recruit and play as anyone from london. everyone you see has a unique backstory, personality, and skill set for unique situations. hack armed drones, deploy spider-bots, and take down enemies using an augmented reality cloak. explore a massive urban open world featuring london s many iconic landmarks and fun side activities. team up with your friends as you complete co-op missions and unique game modes. upgrade to the gold edition or ultimate edition for access to the season pass and more!'

In [8]:
def nltkTokenize(text: str) -> str:
    """
    Returns tokenized, lemmatized workds with a few common abriviations

    Args:
        text (str): input text

    Returns:
        str: string of tokens
    """
    temp_sent = []
    words = nltk.word_tokenize(text)
    tags = nltk.pos_tag(words)
    VERB_CODES = {'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'}
    for i, word in enumerate(words):
        if tags[i][1] in VERB_CODES:
            lemmatized = lemmatizer.lemmatize(word, 'v')
        else:
            lemmatized = lemmatizer.lemmatize(word)
        if lemmatized not in stop_words and lemmatized.isalpha():
            temp_sent.append(lemmatized)

    finalsent = ' '.join(temp_sent)
    finalsent = finalsent.replace("n't", " not")
    finalsent = finalsent.replace("'m", " am")
    finalsent = finalsent.replace("'re", " are")
    finalsent = finalsent.replace("'ll", " will")
    finalsent = finalsent.replace("'ve", " have")
    finalsent = finalsent.replace("'d", " would")
    return finalsent


desc = desc.progress_apply(nltkTokenize)


100%|██████████| 686/686 [00:13<00:00, 50.28it/s]


In [9]:
desc[100]


'build resistance make anyone world take back london face downfall recipient award nomination recruit play anyone london everyone see unique backstory personality skill set unique situation hack arm drone deploy take enemy use augmented reality cloak explore massive urban open world feature london many iconic landmark fun side activity team friend complete mission unique game mode upgrade gold edition ultimate edition access season pas'

In [10]:
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(desc)


In [11]:
cos_similar = cosine_similarity(tfidf_matrix, tfidf_matrix)
cos_similar.shape


(686, 686)

In [12]:
indices = pd.Series(df_games.index, index=df_games['name'])
indices.head()

name
The Binding of Isaac: Rebirth    0
Minecraft                        1
Factorio                         2
Europa Universalis IV            3
The Binding of Isaac             4
dtype: int64

In [13]:
def getRecs(titlelist: list[str], count: int = 10) -> list:
    """
    Returns recommendations based on cosine similarity

    Args:
        titlelist (list[str]): List of Game titles

    Returns:
        list: List of Recommendations
    """
    assert count < cos_similar.shape[0] - len(titlelist)
    idxs = []
    for title in titlelist:
        try:
            idxs.append(indices[title])
        except KeyError:  # !
            print('"', title, '" not found, ignoring', sep='')
            continue
    if not idxs:
        print('No valid input, exiting')
        return 0
    similarity_scores = pd.Series(cos_similar[idxs[0]])
    for idx in idxs[1:]:
        similarity_scores += pd.Series(cos_similar[idx])
    similarity_scores.drop(labels=idxs, axis=0, inplace=True)
    similarity_scores.sort_values(ascending=False, inplace=True)
    topRecs = list(similarity_scores.iloc[:count].index)
    return [df_games['name'].loc[rec] for rec in topRecs]


In [14]:
getRecs(["Baldur's Gate III"])


['Divinity: Original Sin II',
 'Pillars of Eternity',
 'The Banner Saga',
 "Baldur's Gate: Enhanced Edition",
 'Dying Light 2 Stay Human',
 'Total War: Warhammer III',
 'Dark and Darker',
 'Remnant: From the Ashes',
 'Dishonored 2',
 'Portal Knights']

In [15]:
df_games.sample(5)

Unnamed: 0,name,count,appid,requiredAge,metacritic,price,windows,mac,linux,releaseDate,acheiveCount,recs,publishers,developers,desc,header_image,genres,categories
676,Zombie Army 4: Dead War,10,694280.0,0,74.0,4999.0,True,False,False,"Feb 18, 2021",70.0,4657.0,[Rebellion],[Rebellion],Hitler’s hordes are back for more in this spin...,https://cdn.akamai.steamstatic.com/steam/apps/...,[Action],"[Single-player, Multi-player, Co-op, Online Co..."
513,Trials Fusion,16,245490.0,0,80.0,1999.0,True,False,False,"Apr 24, 2014",,1911.0,[Ubisoft],"[RedLynx, in collaboration with Ubisoft Shang...",<i>&quot;The most ambitious Trials yet.&quot;<...,https://cdn.akamai.steamstatic.com/steam/apps/...,[Racing],"[Single-player, Multi-player, In-App Purchases..."
354,Transport Fever 2,27,1066780.0,0,76.0,3999.0,True,True,True,"Dec 11, 2019",61.0,15611.0,[Good Shepherd Entertainment],[Urban Games],The classic transport simulation genre has a n...,https://cdn.akamai.steamstatic.com/steam/apps/...,"[Simulation, Strategy]","[Single-player, Steam Achievements, Steam Work..."
408,Golf It!,23,571740.0,0,,899.0,True,True,False,"Feb 17, 2017",41.0,16614.0,[Perfuse Entertainment],[Perfuse Entertainment],<strong>Golf It!</strong> is a multiplayer Min...,https://cdn.akamai.steamstatic.com/steam/apps/...,"[Casual, Indie, Simulation, Sports, Early Access]","[Single-player, Multi-player, PvP, Online PvP,..."
61,Fallout 3,147,22370.0,17,91.0,1999.0,True,False,False,"Dec 17, 2009",,30709.0,[Bethesda Softworks],[Bethesda Game Studios],<strong>Prepare for the Future™</strong><br>\t...,https://cdn.akamai.steamstatic.com/steam/apps/...,[RPG],[Single-player]


In [16]:
import glob

df_vids = pd.concat([pd.read_pickle(i[2:]) for i in glob.glob(
    './pickles/individual/*full.pkl')], ignore_index=True)  # * local
df_vids.dropna(inplace=True)
# minimum number of videos before a game is considered
df_vids = df_vids.groupby("game").filter(lambda x: len(x) >= 5)
df_vids.sample(10)

Unnamed: 0,channel,title,videoID,date,likes,comments,views,game
73171,quill18,The Sims 3 (Laura Bow Livestream) - Part 2,4z6MvYI52hQ,2013-03-06T11:55:30Z,80.0,31.0,6021.0,The Sims 3
40557,Many A True Nerd,FTL: Faster Than Livestream - Hard Time,SdZUIFm-s-s,2017-07-19T19:08:01Z,1571.0,118.0,70849.0,FTL: Faster Than Light
46749,Northernlion,Northernlion Plays - Dark Souls 3 (Again) - Ep...,BPcy3RRq5dY,2020-05-12T01:00:00Z,791.0,61.0,21351.0,Dark Souls III
58179,Northernlion,Let's Play - XCOM: Enemy Within! [Episode 1: R...,4fzugM05Wz4,2013-11-12T20:00:03Z,7582.0,866.0,318029.0,XCOM: Enemy Unknown
6877,Blitz,The Curious Expedition - Ep. 3 - Thief in the ...,Baf_useRivg,2015-04-22T17:00:00Z,211.0,8.0,10505.0,The Curious Expedition
83897,SeaNanners Gaming Channel,SEANANNERS VS GOLDGLOVE (The Hidden),1UwCfw65sUg,2013-02-28T17:01:16Z,20466.0,1100.0,1627595.0,The Hidden
63988,quill18,Rimworld 1.3 Preview - Ep 7,AgYi5LPLnJU,2021-07-17T17:00:14Z,387.0,15.0,9873.0,RimWorld
45804,Northernlion,What Can You Even Say? | Fall Guys #53,cMoLoW7w4D8,2020-09-30T16:00:09Z,2578.0,176.0,35674.0,Fall Guys: Ultimate Knockout
57683,Northernlion,Let's Play - Dark Souls 2 - Episode 11,hDZf97Laav8,2014-03-19T20:00:03Z,2578.0,181.0,85682.0,Dark Souls II
4080,Blitz,I Built The World's Most Dangerous Petting Zoo...,o4xpQQZlxFs,2019-11-05T21:00:07Z,13103.0,459.0,996482.0,Planet Zoo


In [17]:
df_vids[['channel', 'views']].groupby(by=['channel'])['views'].max().sort_values(ascending=False)

channel
Markiplier                   107022934.0
jacksepticeye                 73240479.0
VanossGaming                  64648885.0
DanTDM                        57967453.0
SeaNanners Gaming Channel     34182772.0
theRadBrad                    23324484.0
Smosh Games                   18680940.0
H2ODelirious                  18017591.0
SmallAnt                      15253902.0
AngryJoeShow                   9948133.0
Blitz                          8948332.0
Many A True Nerd               8325810.0
quill18                        6951898.0
Splattercatgaming              2647699.0
Northernlion                   2487590.0
PartyElite                     1536945.0
Mathas                          985625.0
KatherineOfSky                  846443.0
Name: views, dtype: float64

In [18]:
df_vids[['channel', 'views']].groupby(by=['channel'])['views'].mean()

channel
AngryJoeShow                 7.073514e+05
Blitz                        3.143199e+05
DanTDM                       5.191466e+06
H2ODelirious                 1.322796e+06
KatherineOfSky               9.429180e+03
Many A True Nerd             9.626590e+04
Markiplier                   3.397571e+06
Mathas                       1.186248e+04
Northernlion                 6.538259e+04
PartyElite                   2.045515e+04
SeaNanners Gaming Channel    1.523004e+06
SmallAnt                     2.683604e+06
Smosh Games                  9.640398e+05
Splattercatgaming            3.937815e+04
VanossGaming                 9.052538e+06
jacksepticeye                3.217658e+06
quill18                      3.310933e+04
theRadBrad                   8.031627e+05
Name: views, dtype: float64

In [19]:
df_vids.shape

(78359, 8)

In [20]:
pd.set_option('compute.use_numba', False)
df_vids['views_normalized'] = df_vids.groupby(by=['channel']).views.transform(lambda x: (x/x.max()) * 100)

In [21]:
df_vids[['channel', 'views_normalized']].groupby(by=['channel'])['views_normalized'].mean()

channel
AngryJoeShow                  7.110393
Blitz                         3.512608
DanTDM                        8.955829
H2ODelirious                  7.341693
KatherineOfSky                1.113977
Many A True Nerd              1.156235
Markiplier                    3.174619
Mathas                        1.203549
Northernlion                  2.628351
PartyElite                    1.330897
SeaNanners Gaming Channel     4.455471
SmallAnt                     17.592903
Smosh Games                   5.160553
Splattercatgaming             1.487259
VanossGaming                 14.002620
jacksepticeye                 4.393278
quill18                       0.476263
theRadBrad                    3.443432
Name: views_normalized, dtype: float64

In [22]:
df_vids.sample(21)

Unnamed: 0,channel,title,videoID,date,likes,comments,views,game,views_normalized
87561,Smosh Games,RESIDENT EVIL 6 (Backseat Gaming),BnR8A7xQCYw,2012-10-02T19:45:49Z,5866.0,1300.0,537928.0,Resident Evil 6,2.879555
5257,Blitz,Trapping the Neighbor on the Road! - Hello Nei...,eSKOZVFqz54,2017-08-03T16:00:31Z,9167.0,1100.0,1413485.0,Hello Neighbor,15.796072
94084,Splattercatgaming,Let's Play Shadowrun : Dragonfall - Episode 33...,i-9qxE0RnsU,2014-03-29T15:00:07Z,217.0,15.0,9275.0,Shadowrun: Dragonfall,0.350304
11255,DanTDM,"Minecraft | THE CORRUPTION! (NEW Dimension, Bo...",yzSkHrnrg1E,2013-03-08T23:46:49Z,3397.0,240.0,537602.0,Minecraft,0.92742
41618,Many A True Nerd,Ryse: Son of Rome - Part 2 - Beyond The Wall,tx9w501b2pM,2015-09-18T21:42:37Z,1638.0,171.0,69757.0,Ryse: Son of Rome,0.83784
83205,SeaNanners Gaming Channel,FIVE NIGHTS AT FREDDY'S 4 (Garry's Mod Murder),zd1erT5_CoA,2015-03-05T17:00:01Z,32885.0,545.0,2062991.0,Five Nights at Freddy's 4,6.035178
25598,Markiplier,KILL IT WITH FIRE!! | The Forest,MrL2njApYP4,2021-02-28T18:00:10Z,228076.0,8600.0,5814998.0,The Forest,5.433413
58625,Northernlion,Northernlion Plays: Crusader Kings II: The Old...,_Ym7U9HpEBc,2013-06-21T22:00:40Z,1250.0,241.0,39632.0,Crusader Kings II,1.593189
75458,theRadBrad,RATCHET AND CLANK RIFT APART PS5 Walkthrough G...,fCW3cMYcZzc,2021-06-13T00:00:05Z,10741.0,464.0,278344.0,2021,1.193355
52560,Northernlion,Dead Cells - Northernlion Plays - Episode 6,2ikfWEMwB1M,2017-05-20T17:00:00Z,2910.0,350.0,95933.0,Dead Cells,3.856463


In [23]:
df = df_vids.merge(df_games, left_on='game', right_on='name', how='inner')
df = df[['channel', 'game', 'title', 'views_normalized', 'count']]
df.rename(columns={'count':'videosForGame', 'title':'videotitle'}, inplace=True)
df.sample(10)

Unnamed: 0,channel,game,videotitle,views_normalized,videosForGame
24137,Northernlion,Super Auto Pets,Moose me (Super Auto Pets),2.732886,708
25632,Blitz,Goat Simulator,Goat Simulator: Waste of Space DLC - Portal Go...,0.849778,64
38087,Mathas,Project Zomboid,Let's Play Project Zomboid [137] - Magic Tricks,0.108155,762
51574,quill18,Civilization VI,Civilization 6 - A Tutorial for Complete Begin...,62.263917,828
31759,quill18,RimWorld,RimWorld Alpha 10 - ODE TO JOY - Episode 12,0.182684,1346
8749,theRadBrad,Alien: Isolation,Alien Isolation Walkthrough Gameplay Part 3 - ...,5.89529,152
6972,Mathas,2020,Back To Square One | Mount & Blade II: Bannerl...,0.965986,233
45890,Northernlion,The Binding of Isaac,Where's My Chocobo? | Repentance on Stream (Ep...,1.686894,1689
17740,Markiplier,Dead Space 2,Dead Space 2 | Part 7 | DID YOU KILL YOURSELF?,0.359737,94
9369,Mathas,Outward,Outward | The Potential | Let's Play Outward G...,0.900951,20


In [24]:
df.describe()

Unnamed: 0,views_normalized,videosForGame
count,59398.0,59398.0
mean,2.989459,942.901983
std,5.424533,1311.595584
min,0.003107,10.0
25%,0.450173,76.0
50%,1.398706,285.0
75%,3.294635,1548.0
max,100.0,4504.0


In [25]:
pd.set_option('display.precision', 2)

In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 59398 entries, 0 to 59397
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   channel           59398 non-null  object 
 1   game              59398 non-null  object 
 2   videotitle        59398 non-null  object 
 3   views_normalized  59398 non-null  float64
 4   videosForGame     59398 non-null  int64  
dtypes: float64(1), int64(1), object(3)
memory usage: 2.7+ MB


In [27]:
vidcount = df.groupby(by=['channel']).size().reset_index()
vidcount.columns = ['channel', 'videosForChannel']
df = df.merge(vidcount, on='channel')

In [28]:
vidcount = df.groupby(by=['channel']).views_normalized.agg('mean').reset_index()
vidcount.columns = ['channel', 'avgViewsChannel']
df = df.merge(vidcount, on='channel')

vidcountGame = df.groupby(by=['game']).views_normalized.agg('mean').reset_index()
vidcountGame.columns = ['game', 'avgViewsGame']
df = df.merge(vidcountGame, on='game')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 59398 entries, 0 to 59397
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   channel           59398 non-null  object 
 1   game              59398 non-null  object 
 2   videotitle        59398 non-null  object 
 3   views_normalized  59398 non-null  float64
 4   videosForGame     59398 non-null  int64  
 5   videosForChannel  59398 non-null  int64  
 6   avgViewsChannel   59398 non-null  float64
 7   avgViewsGame      59398 non-null  float64
dtypes: float64(3), int64(2), object(3)
memory usage: 4.1+ MB


In [29]:
df.sample(2)

Unnamed: 0,channel,game,videotitle,views_normalized,videosForGame,videosForChannel,avgViewsChannel,avgViewsGame
27381,Mathas,Project Zomboid,Let's Play Project Zomboid [91] - Architecture,0.19,762,5311,1.26,1.36
44908,KatherineOfSky,My Time at Portia,My Time at Portia Ep 50: FIGHTING FOR LUBRICAN...,0.33,64,3530,1.22,0.69


In [30]:
df['total views'] = (df['videosForChannel']*df['avgViewsChannel'] + df['videosForGame']*df['avgViewsGame'])/(df['videosForGame']+df['videosForChannel'])
df['rating_new'] = df['views_normalized'] - df['total views']
df.sample(5)

Unnamed: 0,channel,game,videotitle,views_normalized,videosForGame,videosForChannel,avgViewsChannel,avgViewsGame,total views,rating_new
55286,KatherineOfSky,Brotato,"Brotato 1 - VAMPIRE MOVE OVER, RELEASE THE POT...",0.24,10,3530,1.22,2.54,1.22,-0.98
40066,Northernlion,The Binding of Isaac,Let's Play - The Binding of Isaac - Episode 63...,1.59,1689,12527,2.75,2.53,2.73,-1.13
659,theRadBrad,Days Gone,DAYS GONE Walkthrough Gameplay Part 54 - CINNA...,2.31,97,4694,3.61,3.54,3.61,-1.3
57621,Northernlion,Super Auto Pets,An unlikely hero appears (Super Auto Pets),4.86,708,12527,2.75,4.11,2.83,2.04
11669,H2ODelirious,For Honor,For Honor - Friendly Duels! 2v2 Matches!,13.67,76,2109,7.68,5.53,7.61,6.06


In [31]:
pivot_table_channel = pd.pivot_table(data=df,values='rating_new',index='channel',columns='game')
pivot_table_channel = pivot_table_channel.fillna(0)
pivot_table_game = pd.pivot_table(data=df,values='views_normalized',index='channel',columns='game')
pivot_table_game = pivot_table_game.fillna(0)

In [32]:
pivot_table_channel

game,1996,20 Minutes Till Dawn,2000,2004,2009,2013,2014,2015,2016,2017,...,Worms: Revolution,Wreckfest,X-Plane,XCOM 2,Yandere Simulator,Z1 Battle Royale,Zafehouse: Diaries,Zombie Army 4: Dead War,theHunter,theHunter: Call of the Wild
channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AngryJoeShow,0.0,0.0,0.0,-5.78,-5.96,0.0,-5.08,-5.69,-2.66,-2.21,...,0.0,0.0,0.0,3.73,0.0,0.0,0.0,0.0,0.0,0.0
Blitz,0.0,0.0,0.0,0.0,0.0,-3.4,0.0,0.0,-3.23,0.64,...,0.0,-1.23,0.0,0.0,0.0,0.0,0.0,-2.73,-1.05,5.78
DanTDM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.14,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
H2ODelirious,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.46,0.27,...,0.0,0.0,0.0,0.0,0.0,3.84,0.0,-4.81,0.0,0.0
KatherineOfSky,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.75,0.0,0.0,0.0,0.0,0.0,0.0
Many A True Nerd,0.0,0.0,0.0,0.0,0.0,-0.81,0.0,-0.8,-0.5,-0.8,...,0.0,-0.77,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Markiplier,1.45,0.0,0.0,0.0,0.0,0.55,0.0,12.41,0.74,-1.33,...,0.0,1.32,0.0,0.0,8.69,-1.49,0.0,0.0,0.0,0.0
Mathas,0.0,0.0,-0.23,1.02,0.0,-0.86,-0.8,-0.59,-0.2,-0.62,...,-1.17,0.0,0.0,-0.38,2.86,-0.37,-0.66,0.0,0.0,-0.73
Northernlion,0.0,0.87,0.0,0.0,0.0,-0.53,-0.81,-0.38,-0.72,2.25,...,-1.51,-0.95,0.0,-0.32,0.0,0.0,-1.3,0.0,0.0,-1.06
PartyElite,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.16,...,0.0,0.0,0.0,-1.13,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
channel_based_similarity = 1 - pairwise_distances( pivot_table_channel.values, metric="cosine" )
game_based_similarity = 1 - pairwise_distances( pivot_table_game.T.values, metric="cosine" )

In [34]:
channel_based_similarity = pd.DataFrame(channel_based_similarity)
channel_based_similarity.columns = channel_based_similarity.columns+1
channel_based_similarity.index = channel_based_similarity.index+1

game_based_similarity = pd.DataFrame(game_based_similarity)
game_based_similarity.columns = game_based_similarity.columns+1
game_based_similarity.index = game_based_similarity.index+1

In [35]:
def rec_game(gamename):
    game = pivot_table_game.T.index.get_loc(gamename)
    games = game_based_similarity[game].sort_values(ascending = False).index.tolist()[:11]
    
    return [df.game.loc[j] for j in games]
def rec_channel(channelname):
    channel = pivot_table_channel.index.get_loc(channelname)
    us = channel_based_similarity[channel].sort_values(ascending = False).index.tolist()[:101]
    return [df.channel.loc[j] for j in us]

In [36]:
rec_game('Middle-earth: Shadow of War')

['League of Legends',
 'League of Legends',
 'League of Legends',
 'League of Legends',
 'Days Gone',
 'Days Gone',
 '2022',
 'Days Gone',
 'League of Legends',
 'Dead Space',
 'Dead Space']

In [37]:
rec_channel('Many A True Nerd')

['H2ODelirious',
 'Markiplier',
 'Markiplier',
 'Markiplier',
 'Markiplier',
 'Markiplier',
 'AngryJoeShow',
 'AngryJoeShow',
 'Markiplier',
 'AngryJoeShow',
 'Markiplier',
 'Markiplier',
 'AngryJoeShow',
 'Markiplier',
 'Markiplier',
 'Markiplier',
 'Markiplier',
 'Markiplier']

In [38]:
reader = Reader(rating_scale=(0, 100))
sup_data = Dataset.load_from_df(df[['channel', 'game', 'views_normalized']], reader)

In [39]:
algo = NormalPredictor()
cross_validate(algo, sup_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NormalPredictor on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    6.7196  6.9210  6.8883  6.8813  6.9742  6.8769  0.0852  
MAE (testset)     4.3796  4.4578  4.4167  4.4084  4.4034  4.4132  0.0255  
Fit time          0.04    0.04    0.05    0.05    0.05    0.04    0.00    
Test time         0.19    0.04    0.04    0.04    0.04    0.07    0.06    


{'test_rmse': array([6.71962888, 6.92096392, 6.88828727, 6.88133684, 6.97422825]),
 'test_mae': array([4.37963044, 4.45782501, 4.416738  , 4.40840822, 4.40343429]),
 'fit_time': (0.04003548622131348,
  0.04303908348083496,
  0.04504108428955078,
  0.0470430850982666,
  0.048517704010009766),
 'test_time': (0.19017314910888672,
  0.043039560317993164,
  0.04403996467590332,
  0.04403996467590332,
  0.043035030364990234)}

In [40]:


algo = SVD()
cross_validate(algo, sup_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)



Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    4.3492  4.5799  4.3905  4.4839  4.7929  4.5193  0.1583  
MAE (testset)     1.8320  1.8896  1.8122  1.8204  1.8922  1.8493  0.0346  
Fit time          0.38    0.34    0.35    0.33    0.36    0.35    0.02    
Test time         0.07    0.07    0.07    0.07    0.08    0.07    0.00    


{'test_rmse': array([4.34915156, 4.57991456, 4.39049432, 4.48391843, 4.79293927]),
 'test_mae': array([1.83203882, 1.88964517, 1.81218775, 1.8204029 , 1.89223262]),
 'fit_time': (0.3763422966003418,
  0.3392951488494873,
  0.34831738471984863,
  0.33280134201049805,
  0.35675930976867676),
 'test_time': (0.06906318664550781,
  0.06737160682678223,
  0.06906270980834961,
  0.06806159019470215,
  0.07623505592346191)}

In [41]:
algo = KNNBasic(k=5)
cross_validate(algo, sup_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    4.8763  5.2990  5.3802  5.3158  5.2727  5.2288  0.1798  
MAE (testset)     1.9811  2.2339  2.2674  2.2532  2.2517  2.1975  0.1087  
Fit time          1.31    1.35    1.29    1.31    1.30    1.31    0.02    
Test time         3.78    3.82    3.78    3.81    3.64    3.76    0.07    


{'test_rmse': array([4.87631249, 5.29895268, 5.38015929, 5.31577309, 5.2726978 ]),
 'test_mae': array([1.98107756, 2.23394   , 2.26736284, 2.25316659, 2.25174054]),
 'fit_time': (1.3114771842956543,
  1.353473424911499,
  1.2919342517852783,
  1.3122742176055908,
  1.2994954586029053),
 'test_time': (3.778005361557007,
  3.8223137855529785,
  3.7782602310180664,
  3.806466579437256,
  3.6380832195281982)}

In [42]:
algo = NMF()
cross_validate(algo, sup_data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm NMF on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    4.5962  4.4176  4.6514  4.2865  4.5029  4.4909  0.1297  
MAE (testset)     1.7669  1.6994  1.7261  1.7302  1.7461  1.7337  0.0223  
Fit time          0.49    0.49    0.51    0.54    0.51    0.51    0.02    
Test time         0.06    0.21    0.06    0.06    0.06    0.09    0.06    


{'test_rmse': array([4.59619063, 4.41759551, 4.65140688, 4.28654741, 4.50288099]),
 'test_mae': array([1.76688286, 1.69944828, 1.72612139, 1.73018881, 1.74606749]),
 'fit_time': (0.49144744873046875,
  0.4947643280029297,
  0.5128450393676758,
  0.5394909381866455,
  0.5064613819122314),
 'test_time': (0.06005430221557617,
  0.21219348907470703,
  0.06005454063415527,
  0.061055898666381836,
  0.05905318260192871)}

In [43]:


sup_train = sup_data.build_full_trainset()
algo = NMF(n_factors = 200 , n_epochs = 40 )
algo.fit(sup_train)



<surprise.prediction_algorithms.matrix_factorization.NMF at 0x19a378427d0>

In [44]:
def prediction_algo(uid = None , iid = None):
    predictions = []
    if uid is None:
        for ui in sup_train.all_users():
            predictions.append(algo.predict(ui, iid, verbose = False))
        return predictions
    
    if iid is None:
        for ii in sup_train.all_items():
            ii = sup_train.to_raw_iid(ii)
            predictions.append(algo.predict(uid, ii, verbose = False))
        return predictions
    return predictins.append(algo.predict(uid,iid,verbose = False))

In [47]:
predictions = prediction_algo(uid = 19)
predictions.sort(key=lambda x: x.est, reverse=True)
print('#### Best Recommanded Movies are ####')
for pred in predictions[:21]:
#     print('Movie -> {} with Score-> {}'.format(sup_train.to_raw_iid(pred.iid) , pred.est))
    print('Movie -> {} with Score-> {}'.format(pred.iid , pred.est))

#### Best Recommanded Movies are ####
Movie -> Dead Space with Score-> 2.9894585533122617
Movie -> Battlefield 2042 with Score-> 2.9894585533122617
Movie -> League of Legends with Score-> 2.9894585533122617
Movie -> Dark and Darker with Score-> 2.9894585533122617
Movie -> 2022 with Score-> 2.9894585533122617
Movie -> Death Stranding with Score-> 2.9894585533122617
Movie -> God of War with Score-> 2.9894585533122617
Movie -> Days Gone with Score-> 2.9894585533122617
Movie -> Gwent: The Witcher Card Game with Score-> 2.9894585533122617
Movie -> Call of Duty: Modern Warfare 2 with Score-> 2.9894585533122617
Movie -> Mass Effect Legendary Edition with Score-> 2.9894585533122617
Movie -> DAVE THE DIVER with Score-> 2.9894585533122617
Movie -> A Plague Tale: Requiem with Score-> 2.9894585533122617
Movie -> Call of Duty with Score-> 2.9894585533122617
Movie -> Blood Bowl 2 with Score-> 2.9894585533122617
Movie -> Cyberpunk 2077 with Score-> 2.9894585533122617
Movie -> The Mortuary Assistant w