# Video Game Recommender Model
This step takes the data output from exploration to group and recommend video games by "like" similarity and fine-tune based on text review sentiment scores.

In [1]:
%config Completer.use_jedi = False

In [2]:
import pandas as pd
import numpy as np
import os.path

from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
from lightfm import LightFM



In [3]:
path_steam = "C:/users/ggibs/steam_game_recommender"
path_interim = os.path.join(path_steam, "data/interim/")
path_external = os.path.join(path_steam, "data/external/")

In [4]:
# list of game titles
app_list = pd.read_csv(os.path.join(path_external, "app_list.csv"), index_col="appid")
app_list.head()

Unnamed: 0_level_0,name
appid,Unnamed: 1_level_1
10,Counter-Strike
20,Team Fortress Classic
30,Day of Defeat
40,Deathmatch Classic
50,Half-Life: Opposing Force


In [5]:
game_model = pd.read_csv(os.path.join(path_interim, "game_model.csv"))
game_model.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 779826 entries, 0 to 779825
Data columns (total 23 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   appid                           779826 non-null  int64  
 1   recommendationid                779826 non-null  int64  
 2   rescore                         779826 non-null  float64
 3   timestamp_created               779826 non-null  int64  
 4   voted_up                        779826 non-null  bool   
 5   author.steamid                  779826 non-null  int64  
 6   author.num_games_owned          779826 non-null  int64  
 7   author.num_reviews              779826 non-null  int64  
 8   author.playtime_forever         779739 non-null  float64
 9   author.playtime_last_two_weeks  779739 non-null  float64
 10  author.playtime_at_review       779570 non-null  float64
 11  genre                           779374 non-null  object 
 12  Action          

In [6]:
game_model.rename(columns = {'rescore':'score'}, inplace = True) 

game_model[game_model.duplicated(subset=['author.steamid', 'appid'])==True]

Unnamed: 0,appid,recommendationid,score,timestamp_created,voted_up,author.steamid,author.num_games_owned,author.num_reviews,author.playtime_forever,author.playtime_last_two_weeks,...,Indie,Adventure,RPG,Strategy,Simulation,Casual,Massively,Sports,Racing,Other


No duplicate votes of same game, same user 

In [7]:
# Add numeric for voted_up True / False, 1 / -1, so emtpy locations can be zero
game_model['voted_up_num'] = np.where(game_model['voted_up'] == True, 1, -1)
game_model['voted_up_num'].value_counts()

 1    693628
-1     86198
Name: voted_up_num, dtype: int64

11% of votes are down votes

### Start item-item collaborative filtering

In [8]:
# Start game-neighbors, similar liked-based game recommendations only, drop user and genre, reserve disklikes for exclusion
likes_only = game_model[['appid', 'author.steamid','voted_up']][game_model['voted_up']==True]
likes_only.voted_up = 1
likes_only = likes_only.merge(app_list, on='appid')
likes_only.drop('appid', 1, inplace=True)

dislikes_only = game_model[['appid', 'author.steamid','voted_up']][game_model['voted_up']==False]
dislikes_only.voted_up = -1
dislikes_only = dislikes_only.merge(app_list, on='appid')
dislikes_only.drop('appid', 1, inplace=True)

In [9]:
# Create pivot
likes_only_pivot = likes_only.pivot_table(index = 'author.steamid', columns = 'name', values = 'voted_up').fillna(0)
likes_only_pivot.reset_index(inplace=True)

# Remove index name
likes_only_pivot = likes_only_pivot.rename_axis(None, axis=1).reset_index(drop=True)


dislikes_only_pivot = dislikes_only.pivot_table(index = 'author.steamid', columns = 'name', values = 'voted_up').fillna(0)
dislikes_only_pivot.reset_index(inplace=True)
dislikes_only_pivot = dislikes_only_pivot.rename_axis(None, axis=1).reset_index(drop=True)

In [10]:
# Items only, drop users
likes_only_pivot_items = likes_only_pivot.drop('author.steamid', 1)
likes_only_pivot_items.iloc[:5, :10]

Unnamed: 0,12 is Better Than 6,7 Days to Die,A Plague Tale: Innocence,ARMA: Cold War Assault,ASTRONEER,ATLAS,Ace of Spades: Battle Builder,Age of Empires II (2013),Age of Empires II: Definitive Edition,Age of Empires III: Complete Collection
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
dislikes_only_pivot_items = dislikes_only_pivot.drop('author.steamid', 1)

In [12]:
# Normalize vectors with magnitude so that high-volume gamers do not overwhelm low-volume
magnitude = np.sqrt(np.square(likes_only_pivot_items).sum(axis=1))
likes_only_pivot_items = likes_only_pivot_items.divide(magnitude, axis='index')

In [13]:
def calculate_similarity(data_items):
    """Calculate the column-wise cosine similarity for a sparse
    matrix. Return a new dataframe matrix with similarities.
    """
    data_sparse = sparse.csr_matrix(data_items)
    similarities = cosine_similarity(data_sparse.transpose())
    sims = pd.DataFrame(data=similarities, index= data_items.columns, columns= data_items.columns)
    return sims

In [14]:
# Build the similarity matrix
gameXgame = calculate_similarity(likes_only_pivot_items)
gameXgame.iloc[:7, :7]

Unnamed: 0,12 is Better Than 6,7 Days to Die,A Plague Tale: Innocence,ARMA: Cold War Assault,ASTRONEER,ATLAS,Ace of Spades: Battle Builder
12 is Better Than 6,1.0,0.001533,0.006094,0.005461,0.0,0.000687,0.0
7 Days to Die,0.001533,1.0,0.007915,0.007277,0.001113,0.045306,0.003646
A Plague Tale: Innocence,0.006094,0.007915,1.0,0.0,0.0,0.004776,0.000707
ARMA: Cold War Assault,0.005461,0.007277,0.0,1.0,0.0,0.0,0.0
ASTRONEER,0.0,0.001113,0.0,0.0,1.0,0.004892,0.0
ATLAS,0.000687,0.045306,0.004776,0.0,0.004892,1.0,0.0
Ace of Spades: Battle Builder,0.0,0.003646,0.000707,0.0,0.0,0.0,1.0


In [15]:
# Identify closest neighbors of video games
game_neighbors = pd.DataFrame(index=gameXgame.columns, columns=range(1,11))
for i in range(0, len(gameXgame.columns)):
    game_neighbors.iloc[i,:10] = gameXgame.iloc[0:,i].sort_values(ascending=False)[:10].index

In [17]:
game_neighbors.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
12 is Better Than 6,12 is Better Than 6,Reus,Prey,Beholder,Guns of Icarus Online,Call of Duty: Ghosts,Mark of the Ninja,The Darkness II,LIMBO,Dark Messiah of Might & Magic
7 Days to Die,7 Days to Die,Terraria,Raft,Space Engineers,ATLAS,Garry's Mod,Counter-Strike: Global Offensive,DayZ,They Are Billions,Left 4 Dead 2
A Plague Tale: Innocence,A Plague Tale: Innocence,Shadow of the Tomb Raider: Definitive Edition,GRIS,STAR WARS Jedi: Fallen Order,Assassin's Creed Odyssey,Resident Evil 2,Ori and the Will of the Wisps,Life is Strange: Before the Storm,Sekiro: Shadows Die Twice,Quantum Break
ARMA: Cold War Assault,ARMA: Cold War Assault,Arma 2: Operation Arrowhead,Day of Defeat,Half-Life 2: Deathmatch,Counter-Strike,Mount & Blade: Warband,POSTAL 2,Counter-Strike: Source,Zombie Army Trilogy,Half-Life
ASTRONEER,ASTRONEER,Terraria,Among Us,The Forest,Oxygen Not Included,The Incredible Adventures of Van Helsing,SUPERHOT,Risk of Rain 2,Counter-Strike: Global Offensive,The Bureau: XCOM Declassified


In [18]:
# Some game series with many editions match mostly to themselves
game_neighbors.loc['Resident Evil':'Resident Evil 6', 2:7]

Unnamed: 0,2,3,4,5,6,7
Resident Evil,Resident Evil 2,Resident Evil 4,Resident Evil 3,Resident Evil Revelations,Resident Evil 5,Resident Evil Revelations 2
Resident Evil 2,Resident Evil 3,Resident Evil,Devil May Cry 5,Resident Evil 4,Sekiro: Shadows Die Twice,DOOM Eternal
Resident Evil 3,Resident Evil 2,Resident Evil,Resident Evil 4,DOOM Eternal,Devil May Cry 5,Resident Evil 5
Resident Evil 4,Resident Evil 5,Resident Evil,Resident Evil 6,Resident Evil 2,Resident Evil Revelations,Resident Evil 3
Resident Evil 5,Resident Evil 6,Resident Evil 4,Resident Evil Revelations,Resident Evil Revelations 2,Resident Evil,Resident Evil 2
Resident Evil 6,Resident Evil 5,Resident Evil Revelations,Resident Evil 4,Resident Evil Revelations 2,Resident Evil 2,Resident Evil


In [19]:
# Tomb Raider did match to Laura Croft
game_neighbors.loc['Tomb Raider II':'Tomb Raider II', 2:5]

Unnamed: 0,2,3,4,5
Tomb Raider II,Tomb Raider: Legend,Tomb Raider: Underworld,Tomb Raider: Anniversary,Lara Croft and the Guardian of Light


In [20]:
# Two games with Chinese matched
game_neighbors.loc['古剑奇谭三(Gujian3)':'古剑奇谭三(Gujian3)', 2:5]

Unnamed: 0,2,3,4,5
古剑奇谭三(Gujian3),Chinese Parents,Conqueror's Blade,Assassin's Creed Odyssey,FINAL FANTASY XV WINDOWS EDITION


### Start user-item collaborative filtering

In [144]:
# Retain the recommended games and scores
game_recommend = pd.DataFrame()
game_recommend_scores = pd.DataFrame()

record=0

for user in likes_only_pivot['author.steamid']:
    record += 1
    print("iteration {}".format(record), end='\r')  # display record counter
    user_index = likes_only_pivot[likes_only_pivot['author.steamid'] == user].index.tolist()[0]
    try:
        user_index_dis = dislikes_only_pivot[dislikes_only_pivot['author.steamid'] == user].index.tolist()[0]
    except:
        user_index_dis = None
    
    # Get the games the user has played
    known_user_likes = likes_only_pivot_items.iloc[user_index]
    known_user_likes = known_user_likes[known_user_likes >0].index.values
    if user_index_dis is not None:
        known_user_dislikes = dislikes_only_pivot_items.iloc[user_index_dis]
        known_user_dislikes = known_user_dislikes[known_user_dislikes <0].index.values

    # Construct the neighborhood from the most similar items to the ones user has already liked
    most_similar_to_likes = game_neighbors.loc[known_user_likes]
    similar_list = most_similar_to_likes.values.tolist()
    similar_list = list(set([item for sublist in similar_list for item in sublist]))
    neighborhood = gameXgame[similar_list].loc[similar_list]

    # A user vector containing only the neighborhood items and the known user likes
    user_vector = likes_only_pivot_items.iloc[user_index].loc[similar_list]

    # Calculate the score
    score = neighborhood.dot(user_vector).div(neighborhood.sum(axis=1))

    # Drop the known likes and dislikes
    score = score.drop(known_user_likes)
    if user_index_dis is not None:
        score = score.drop(known_user_dislikes, errors='ignore')

    topscore = score.nlargest(10)
    topscore.name = user
    
    # Save player ID and games
    game_names = pd.DataFrame()
    game_names[user] = topscore.index
    game_names = game_names.transpose()
    game_recommend = game_recommend.append(game_names)
    
    # Save player ID and game similarity scores
    game_scores = pd.DataFrame()
    game_scores[user] = topscore.values
    game_scores = game_scores.transpose()
    game_recommend_scores = game_recommend_scores.append(game_scores)

iteration 179850

In [145]:
game_recommend.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
76561197960267984,Aliens vs. Predator,Resident Evil 2,Tomb Raider,Amnesia: The Dark Descent,BioShock Infinite,A Plague Tale: Innocence,DOOM Eternal,Halo: The Master Chief Collection,Portal 2,Batman: Arkham Knight
76561197960268765,DOOM Eternal,Resident Evil 2,STAR WARS Jedi: Fallen Order,CODE VEIN,FINAL FANTASY XV WINDOWS EDITION,Risk of Rain 2,Destiny 2,DRAGON BALL FighterZ,MORDHAU,Remnant: From the Ashes
76561197960269155,Total War: WARHAMMER II,Assassin's Creed III Remastered,STAR WARS Jedi: Fallen Order,Total War: ROME II - Emperor Edition,Shadow of the Tomb Raider: Definitive Edition,Far Cry New Dawn,Sekiro: Shadows Die Twice,Planet Zoo,MORDHAU,A Plague Tale: Innocence
76561197960269294,Terraria,Slay the Spire,DRAGON BALL XENOVERSE 2,Risk of Rain,Halo: The Master Chief Collection,Remnant: From the Ashes,Skullgirls,DOOM Eternal,Sekiro: Shadows Die Twice,Devil May Cry 5
76561197960269645,Among Us,Counter-Strike: Global Offensive,Portal 2,Left 4 Dead 2,Bloons TD 6,Ravenfield,The Binding of Isaac: Rebirth,Fallout: New Vegas,Half-Life 2,Fall Guys: Ultimate Knockout


In [146]:
game_recommend_scores.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
76561197960267984,0.030982,0.022507,0.019024,0.018436,0.018104,0.016352,0.016254,0.015975,0.015852,0.015732
76561197960268765,0.07683,0.067248,0.064066,0.062007,0.053155,0.052918,0.048502,0.047494,0.046875,0.046416
76561197960269155,0.081284,0.052797,0.052104,0.046214,0.04484,0.038948,0.037996,0.037104,0.035048,0.033774
76561197960269294,0.047962,0.045824,0.045516,0.043749,0.039652,0.037697,0.034396,0.033525,0.032844,0.031398
76561197960269645,0.078445,0.078318,0.070763,0.070283,0.070129,0.059886,0.057821,0.055349,0.055053,0.054524


In [152]:
game_recommend.to_csv(os.path.join(path_interim, "game_recommend.csv"))
game_recommend_scores.to_csv(os.path.join(path_interim, "game_recommend_scores.csv"))

In [16]:
# game_recommend = pd.read_csv(os.path.join(path_interim, "game_recommend.csv"), index_col=[0])
# game_recommend_scores = pd.read_csv(os.path.join(path_interim, "game_recommend_scores.csv"), index_col=[0])

### Validation

In [17]:
# Manually calculate a recall, players with 10 likes, hide 4, what percentage is included in recommendations?

In [20]:
# Sum games liked
likes_only_pivot_sum = likes_only.pivot_table(index = 'author.steamid', columns = 'name', values = 'voted_up', aggfunc='sum', margins=True).fillna(0)

In [30]:
# Gamers with 10 likes
likes10 = likes_only_pivot_sum.index[likes_only_pivot_sum['All']==10]
len(likes10)

1381

In [112]:
# Gamers first 6 likes
likes10_data = game_model[game_model['author.steamid'].isin(likes10)][game_model['voted_up']==True]
likes10_sorted = likes10_data.groupby(['author.steamid']).apply(lambda x: x.sort_values(['timestamp_created'])).reset_index(drop=True)
likes10_sorted = likes10_sorted.reset_index(drop=True)
likes6 = likes10_sorted.groupby(['author.steamid']).head(6)
likes6.head(18)

  likes10_data = game_model[game_model['author.steamid'].isin(likes10)][game_model['voted_up']==True]


Unnamed: 0,appid,recommendationid,score,timestamp_created,voted_up,author.steamid,author.num_games_owned,author.num_reviews,author.playtime_forever,author.playtime_last_two_weeks,...,Adventure,RPG,Strategy,Simulation,Casual,Massively,Sports,Racing,Other,voted_up_num
0,13520,41999138,0.4064,1526136706,True,76561197960454962,842,49,666.0,0.0,...,0,0,0,0,0,0,0,0,0,1
1,621060,46591479,0.4767,1542983480,True,76561197960454962,842,49,15.0,0.0,...,0,0,0,1,0,0,0,0,0,1
2,814380,50502147,0.7906,1557091921,True,76561197960454962,844,49,2865.0,0.0,...,1,0,0,0,0,0,0,0,0,1
3,692850,53462531,0.875,1562212805,True,76561197960454962,844,49,1067.0,0.0,...,1,1,0,0,0,0,0,0,0,1
4,976730,59500069,0.8898,1575816119,True,76561197960454962,845,49,3319.0,131.0,...,0,0,0,0,0,0,0,0,0,1
5,601150,59855326,0.6369,1576352149,True,76561197960454962,842,49,904.0,0.0,...,0,0,0,0,0,0,0,0,0,1
10,730,47442124,0.8223,1544460206,True,76561197960458629,232,56,20443.0,0.0,...,0,0,0,0,0,0,0,0,0,1
11,10,54623327,-0.6597,1565836735,True,76561197960458629,232,56,31430.0,0.0,...,0,0,0,0,0,0,0,0,0,1
12,240,54656854,-0.5423,1566011517,True,76561197960458629,232,56,1412.0,0.0,...,0,0,0,0,0,0,0,0,0,1
13,204100,60858923,0.4404,1577648234,True,76561197960458629,232,56,560.0,0.0,...,0,0,0,0,0,0,0,0,0,1


In [39]:
likes6.to_csv(os.path.join(path_interim, "likes6.csv"))

In [108]:
# Make a similar source pivot for the recommender as before
likes6_only = likes6[['appid', 'author.steamid','voted_up']][likes6['voted_up']==True]
likes6_only.voted_up = 1
likes6_only = likes6_only.merge(app_list, on='appid')
likes6_only.drop('appid', 1, inplace=True)

dislikes6_only = game_model[['appid', 'author.steamid','voted_up']][game_model['author.steamid'].isin(likes10)][game_model['voted_up']==False]
dislikes6_only.voted_up = -1
dislikes6_only = dislikes6_only.merge(app_list, on='appid')
dislikes6_only.drop('appid', 1, inplace=True)

# Create pivot
likes6_only_pivot = likes6_only.pivot_table(index = 'author.steamid', columns = 'name', values = 'voted_up').fillna(0)
likes6_only_pivot.reset_index(inplace=True)

dislikes6_only_pivot = dislikes6_only.pivot_table(index = 'author.steamid', columns = 'name', values = 'voted_up').fillna(0)
dislikes6_only_pivot.reset_index(inplace=True)
dislikes6_only_pivot = dislikes6_only_pivot.rename_axis(None, axis=1).reset_index(drop=True)

# Remove index name
likes6_only_pivot = likes6_only_pivot.rename_axis(None, axis=1).reset_index(drop=True)

# Make same columns as original
for column in likes_only_pivot.columns:
    if column not in likes6_only_pivot.columns:
        likes6_only_pivot[column] = 0.0
likes6_only_pivot.sort_index(axis=1, inplace=True)
acol = likes6_only_pivot['author.steamid']
likes6_only_pivot.drop(labels=['author.steamid'], axis=1, inplace=True)
likes6_only_pivot.insert(0, 'author.steamid', acol)

# Items only, drop users
likes6_only_pivot_items = likes6_only_pivot.drop('author.steamid', 1)
dislikes6_only_pivot_items = dislikes6_only_pivot.drop('author.steamid', 1)

  dislikes6_only = game_model[['appid', 'author.steamid','voted_up']][game_model['author.steamid'].isin(likes10)][game_model['voted_up']==False]


In [109]:
# Run the 6 votes through the recommendation

# Retain the recommended games and scores
game_recommend6 = pd.DataFrame()
game_recommend6_scores = pd.DataFrame()

for user in likes6_only_pivot['author.steamid']:
    user_index = likes6_only_pivot[likes6_only_pivot['author.steamid'] == user].index.tolist()[0]
    try:
        user_index_dis = dislikes6_only_pivot[dislikes6_only_pivot['author.steamid'] == user].index.tolist()[0]
    except:
        user_index_dis = None
    
    # Get the games the user has played
    known_user_likes = likes6_only_pivot_items.iloc[user_index]
    known_user_likes = known_user_likes[known_user_likes >0].index.values
    if user_index_dis is not None:
        known_user_dislikes = dislikes6_only_pivot_items.iloc[user_index_dis]
        known_user_dislikes = known_user_dislikes[known_user_dislikes <0].index.values

    # Construct the neighborhood from the most similar items to the ones user has already liked
    most_similar_to_likes = game_neighbors.loc[known_user_likes]
    similar_list = most_similar_to_likes.values.tolist()
    similar_list = list(set([item for sublist in similar_list for item in sublist]))
    neighborhood = gameXgame[similar_list].loc[similar_list]

    # A user vector containing only the neighborhood items and the known user likes
    user_vector = likes6_only_pivot_items.iloc[user_index].loc[similar_list]
    #user_vector = likes6_only_pivot_items.iloc[user_index].index.intersection(similar_list)

    # Calculate the score
    score = neighborhood.dot(user_vector).div(neighborhood.sum(axis=1))

    # Drop the known likes and dislikes
    score = score.drop(known_user_likes)
    if user_index_dis is not None:
        score = score.drop(known_user_dislikes, errors='ignore')

    topscore = score.nlargest(10)
    topscore.name = user
    
    # Save player ID and games
    game_names = pd.DataFrame()
    game_names[user] = topscore.index
    game_names = game_names.transpose()
    game_recommend6 = game_recommend6.append(game_names)
    
    # Save player ID and game similarity scores
    game_scores = pd.DataFrame()
    game_scores[user] = topscore.values
    game_scores = game_scores.transpose()
    game_recommend6_scores = game_recommend6_scores.append(game_scores)

In [150]:
# Get 4 original votes removed
likes10_games = likes10_sorted.merge(app_list, on='appid')
likes10_games = likes10_games[['author.steamid', 'name']]
likes6_games = likes6_only[['author.steamid', 'name']]
likes_merge = likes10_games.merge(likes6_games, how='left', indicator=True)
likes_merge.sort_values(by = ['author.steamid', 'name'], inplace=True)
likes_diff = likes_merge[likes_merge._merge != 'both']
likes_diff.head(12)

Unnamed: 0,author.steamid,name,_merge
518,76561197960454962,DOOM Eternal,left_only
723,76561197960454962,Remnant: From the Ashes,left_only
689,76561197960454962,Resident Evil 3,left_only
417,76561197960454962,STAR WARS Jedi: Fallen Order,left_only
1748,76561197960458629,Among Us,left_only
1608,76561197960458629,DEATH STRANDING,left_only
1642,76561197960458629,Fall Guys: Ultimate Knockout,left_only
1329,76561197960458629,Terraria,left_only
2175,76561197960478145,Beat Saber,left_only
2128,76561197960478145,Grand Theft Auto: Vice City,left_only


In [140]:
len(likes6_games)

8286

In [139]:
len(likes10_games)

13810

In [151]:
len(likes_diff)

5524

In [179]:
# How many of original 4 votes removed appeared in new recommendations
game_recommend6_stack = game_recommend6.stack().reset_index()
game_recommend6_stack.columns=['author.steamid', 'num', 'name']
game_recommend6_stack.head(20)

Unnamed: 0,author.steamid,num,name
0,76561197960454962,0,CODE VEIN
1,76561197960454962,1,Resident Evil 2
2,76561197960454962,2,DOOM Eternal
3,76561197960454962,3,STAR WARS Jedi: Fallen Order
4,76561197960454962,4,FINAL FANTASY XV WINDOWS EDITION
5,76561197960454962,5,Ori and the Will of the Wisps
6,76561197960454962,6,DRAGON BALL FighterZ
7,76561197960454962,7,Risk of Rain 2
8,76561197960454962,8,Remnant: From the Ashes
9,76561197960454962,9,Resident Evil 3


In [180]:
# calculate recall
recall_merge = likes_diff[['author.steamid', 'name']].merge(game_recommend6_stack, indicator=True)
len(recall_merge[recall_merge._merge == 'both'])

1431

In [182]:
print("Recall rate: ", len(recall_merge)/len(likes_diff))

Recall rate:  0.25905141202027515
