In [2]:
import numpy as np
import pandas as pd
from collections import defaultdict

In [5]:
# import the data
dists = pd.read_pickle('./src/dists.pkl')
orig_profiles = pd.read_pickle('./src/orig_profiles.pkl')
personalities = pd.read_pickle('./src/personalities.pkl')

In [9]:
# resurface the recommender function
def get_friends(screen_name, num_recs=3):
    """
    Outputs a list of friend recommendations based on cosine similarity of the input screen name.
    ---
    :param screen_name: Screen name of the user with which to get friend recommendations.
    :param num_recs: The number of friend recommendations to return.
    :return: List of similar users.
    """
    screen_name = [sn for sn in [screen_name] if sn in dists.columns]
    screen_sum = dists[screen_name].apply(lambda row: np.sum(row), axis=1)
    screen_sum = screen_sum.sort_values(ascending=False)
    ranked_users = screen_sum.index[screen_sum.index.isin(screen_name)==False]
    ranked_users = ranked_users.tolist()
    
    return ranked_users[:num_recs]

In [10]:
# test recommender
get_friends('FlipCeej')

['GuppyRoc', 'iRawn1c', 'annachoiii12']

In [11]:
# create dictionary of screen names as keys and their top 10 recs as values
recs = defaultdict(list)

for name in dists.index:
    recs[name] = get_friends(name, num_recs=10)

In [27]:
# create dataframe from the recommendations
friend_recs = pd.DataFrame.from_dict(recs, orient='index', columns=['rec_' + str(num) for num in range(10)])
friend_recs = friend_recs.reset_index().rename(columns={'index': 'screen_name'})
friend_recs.head()

Unnamed: 0,screen_name,rec_0,rec_1,rec_2,rec_3,rec_4,rec_5,rec_6,rec_7,rec_8,rec_9
0,baka_brooks,justin_quion,nathanmpark,vadimyb,MahchuLapuz,notwhitemichael,albedzxo,speak_ezy23,WorddART,jamesezie,ItMeeNickLee
1,Kwammentary,emfairmill,wokebich,tonytonymarlow,_ProjectKO_,karma1097,Michael_Lapuz,HollyHalligan,Maramore,stealsdeals12,cassandratle
2,D1_data,hemmo199666,TKTalksTech,eledwrites,campuscookieuva,ceugenelee,thejennaayyy,KevinCPadilla,m4nang,nikitasneg,Bunsunkun
3,lolegra,largierthanlife,Kimmy_Gene,helloitscamille,albedzxo,ashlee_ong,MahchuLapuz,esm404,lhousj,WorddART,stinefication
4,stealsdeals12,Michael_Lapuz,karma1097,emfairmill,MeggzW,wokebich,taytastic,Kwammentary,mhr_minaz,_ProjectKO_,smasongames


In [20]:
orig_profiles.head(2)

Unnamed: 0,user_id,screen_name,followers_count,friends_count,favourites_count,statuses_count,follower_ids,friend_ids
0,2649540547,baka_brooks,234,257,4931,218,"[1094750013304029187, 1235337664083222528, 133...","[1235337664083222528, 14372486, 11598303501027..."
1,1094750013304029187,Kwammentary,534,1748,698,210,"[2831103656, 262279955, 1067652594884009984, 1...","[1053664338958016512, 1117781049411624961, 291..."


In [21]:
personalities.head(2)

Unnamed: 0,user_id,all_tweets,Openness,Conscientiousness,Extraversion,Agreeableness,Emotional range
0,2649540547,"@nbcsnl ok this is legendary, 𝗙𝗹𝗶𝗽𝗽𝗲𝗱 𝘁𝗵𝗲 𝘀𝘄𝗶𝘁...",0.730236,0.334933,0.411628,0.106015,0.552724
1,1094750013304029187,We in the Bay Area should do this too https://...,0.906036,0.550105,0.648426,0.297694,0.500949


In [37]:
# add user info to dataframe
stats_recs = friend_recs.merge(orig_profiles.iloc[:, 0:6], on='screen_name')
stats_recs = stats_recs.drop_duplicates('screen_name', keep='first')
print(stats_recs.shape)
stats_recs.head()

(179, 16)


Unnamed: 0,screen_name,rec_0,rec_1,rec_2,rec_3,rec_4,rec_5,rec_6,rec_7,rec_8,rec_9,user_id,followers_count,friends_count,favourites_count,statuses_count
0,baka_brooks,justin_quion,nathanmpark,vadimyb,MahchuLapuz,notwhitemichael,albedzxo,speak_ezy23,WorddART,jamesezie,ItMeeNickLee,2649540547,234,257,4931,218
3,Kwammentary,emfairmill,wokebich,tonytonymarlow,_ProjectKO_,karma1097,Michael_Lapuz,HollyHalligan,Maramore,stealsdeals12,cassandratle,1094750013304029187,534,1748,698,210
4,D1_data,hemmo199666,TKTalksTech,eledwrites,campuscookieuva,ceugenelee,thejennaayyy,KevinCPadilla,m4nang,nikitasneg,Bunsunkun,1235337664083222528,5,64,7,1
5,lolegra,largierthanlife,Kimmy_Gene,helloitscamille,albedzxo,ashlee_ong,MahchuLapuz,esm404,lhousj,WorddART,stinefication,1333491954,169,346,2614,2647
6,stealsdeals12,Michael_Lapuz,karma1097,emfairmill,MeggzW,wokebich,taytastic,Kwammentary,mhr_minaz,_ProjectKO_,smasongames,1219308741369253894,93,1150,1130,878


In [42]:
# add personalities to dataframe
full_user_recs = stats_recs.merge(personalities.drop(columns='all_tweets'), on='user_id').drop(columns='user_id')
full_user_recs.head()

Unnamed: 0,screen_name,rec_0,rec_1,rec_2,rec_3,rec_4,rec_5,rec_6,rec_7,rec_8,rec_9,followers_count,friends_count,favourites_count,statuses_count,Openness,Conscientiousness,Extraversion,Agreeableness,Emotional range
0,baka_brooks,justin_quion,nathanmpark,vadimyb,MahchuLapuz,notwhitemichael,albedzxo,speak_ezy23,WorddART,jamesezie,ItMeeNickLee,234,257,4931,218,0.730236,0.334933,0.411628,0.106015,0.552724
1,Kwammentary,emfairmill,wokebich,tonytonymarlow,_ProjectKO_,karma1097,Michael_Lapuz,HollyHalligan,Maramore,stealsdeals12,cassandratle,534,1748,698,210,0.906036,0.550105,0.648426,0.297694,0.500949
2,D1_data,hemmo199666,TKTalksTech,eledwrites,campuscookieuva,ceugenelee,thejennaayyy,KevinCPadilla,m4nang,nikitasneg,Bunsunkun,5,64,7,1,0.708852,0.768717,0.294999,0.15403,0.112169
3,lolegra,largierthanlife,Kimmy_Gene,helloitscamille,albedzxo,ashlee_ong,MahchuLapuz,esm404,lhousj,WorddART,stinefication,169,346,2614,2647,0.518856,0.069776,0.637403,0.241616,0.757079
4,stealsdeals12,Michael_Lapuz,karma1097,emfairmill,MeggzW,wokebich,taytastic,Kwammentary,mhr_minaz,_ProjectKO_,smasongames,93,1150,1130,878,0.46632,0.706029,0.187577,0.058168,0.317718


In [46]:
# export to csv
full_user_recs.to_excel('full_user_recs.xlsx', sheet_name='data', index=False)