In [1]:
#import libraries
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

In [2]:
# Reading useractivity cleaned data
con_dat = pd.read_csv('processeddata_neural.csv',nrows = 1500000, usecols = ['UsrAgentTokenAnum','RssAnum','RssTagAnum'])
con_dat['viewed'] = 1

# Reading wallpaper data
wallp_df = pd.read_csv('Raw\content-wallpaper.csv', 
                       usecols = ['rssanum','UniqueID','Tag','Viewed']).rename(columns = {'rssanum' : 'RssAnum'
                                                                                          ,'Viewed' : 'totalView'})
# Param setting for recommendation function
wallpaper_df = wallp_df[['RssAnum','UniqueID','Tag']]

In [3]:
# Parameter settings
# value of k in matrix decomposition ( 20 ~ 100)
top_n_matrix = 100

# Number of recommendations
recommendations_num = 2

# Recommendation of which user ?
userID = 3012

# Predection value cut-off
pred_cutoff = 0

In [4]:
wallpaper_df.head()

Unnamed: 0,RssAnum,UniqueID,Tag
0,1886709,WallPaper-Abstract-054,Abstract
1,1886710,WallPaper-Animal-035,Animal
2,1886711,WallPaper-Pattern-041,Pattern
3,1886724,WallPaper-Abstract-055,Abstract
4,1886609,WallPaper-Pattern-040,Pattern


In [5]:
# column UserId for useractivity dataframe
f1 = con_dat['UsrAgentTokenAnum'].unique()
f1 = pd.DataFrame({'UsrAgentTokenAnum':f1})
f1['userId'] = np.arange(1,(len(f1)+1))

m1 = pd.merge(con_dat,f1,on='UsrAgentTokenAnum', how='left')


In [6]:
#Merging userId column in user dataframe
merged_data = pd.merge(m1,wallp_df, on='RssAnum',how='left')
merged_data = merged_data[['UsrAgentTokenAnum','userId','RssAnum','totalView','RssTagAnum','Tag']]

In [7]:
merged_data.head(2)

Unnamed: 0,UsrAgentTokenAnum,userId,RssAnum,totalView,RssTagAnum,Tag
0,231951314,1,1886709,2100,93216,Abstract
1,232555868,2,1886709,2100,93216,Abstract


In [8]:
# User per wallpaper Hit count
t_df = merged_data.groupby(['userId','RssAnum']).size().reset_index(name = 'useridWallpHit')

In [9]:
t_df.head(2)

Unnamed: 0,userId,RssAnum,useridWallpHit
0,1,1886709,1
1,2,1886709,1


In [10]:
t_df.shape

(1127467, 3)

In [11]:
#Pivot of user data for totalView value
v_df = t_df.pivot_table(index = 'userId', columns = 'RssAnum', values = 'useridWallpHit').fillna(0)

In [12]:
v_df.head(2)

RssAnum,1885584,1885604,1885605,1885606,1885607,1885842,1885843,1885844,1885845,1885846,...,2431854,2431862,2431865,2431871,2431876,2431877,2431880,2431882,2431884,2431886
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
#Releasing Memory
del(con_dat)
del(f1)
del(m1)
del(t_df)

In [14]:
# Matrix of pivot values
V = v_df.values

# Mean of user viewed
user_viewed_mean = np.mean(V, axis=1)

# totalView Deviation from Mean and keep shape same ( normalize by each user mean )
V_demeaned = V - user_viewed_mean.reshape(-1,1)


In [15]:
##### Singular Value Decomposition #####
# U is the user “features” matrix
# Sigma is the diagonal matrix of singular values (essentially weights)
# Vt is the wallper “features” matrix
# k = top k matrix for lower rank approximation

U, sigma, Vt = svds(V_demeaned, k = top_n_matrix )

In [16]:
#Releasing Memory
del(V)
del(V_demeaned)

In [17]:
#since U, Vt are diagonal matrix so converting Sigma also to diagonal matrix before multiplication
sigma = np.diag(sigma)

In [18]:
# Making prediction from the decomposed matrices
all_user_predicted_wallp = np.dot(np.dot(U, sigma), Vt) #+ user_viewed_mean.reshape(-1, 1)
preds_df = pd.DataFrame(all_user_predicted_wallp, columns = v_df.columns)

In [19]:
#Releasing Memory
del(user_viewed_mean)
del(U)
del(sigma)
del(Vt)
del(all_user_predicted_wallp)
del(v_df)

In [20]:
preds_df.head(2)

RssAnum,1885584,1885604,1885605,1885606,1885607,1885842,1885843,1885844,1885845,1885846,...,2431854,2431862,2431865,2431871,2431876,2431877,2431880,2431882,2431884,2431886
0,0.000618,0.001291,0.001192,-0.001834,0.000794,0.001514,0.001176,-0.00203,0.00143,0.001386,...,0.001083,0.001168,0.001062,-0.002954,0.001215,0.000676,0.001242,0.001255,-0.003809,0.00135
1,0.002005,0.002629,0.002492,-0.000528,0.001954,0.00272,0.00241,-0.004086,0.002684,0.004307,...,0.001967,0.002228,0.002845,-0.001946,0.002273,0.00236,0.002397,0.002336,-0.008906,0.002415


In [21]:
def sorted_user_prediction(predictions_df, uid):
    
    # Get and sort the user's predictions
    user_row_number = uid - 1 # UserID starts at 1, not 0
    sup = (predictions_df.iloc[user_row_number])
    sup = pd.DataFrame(sup).rename(
        columns = {user_row_number: 'Predictions'}).sort_values(by ='Predictions', ascending=False)

    return sup.reset_index()

s_u_p = sorted_user_prediction(preds_df, userID)


In [22]:
s_u_p.head(2)

Unnamed: 0,RssAnum,Predictions
0,1886612,0.991986
1,1969353,0.104515


In [23]:
def user_viewed_wallp(uid, wallpap_df, original_user_df):
    
    # Get the user's data and merge in the wallpaper information.
    user_data = original_user_df[original_user_df.userId == (uid)]
    user_full = (user_data.merge(wallpap_df, how = 'left', left_on = 'RssAnum', right_on = 'RssAnum'))
                     #.sort_values(['totalView'], ascending=False))
    
    user_full.drop('Tag_x', axis = 1, inplace = True)
    user_full = user_full.rename(columns = {'Tag_y':'Tag'})

    return user_full

user_already_viewed = user_viewed_wallp(userID, wallpaper_df, merged_data)

In [24]:
user_already_viewed

Unnamed: 0,UsrAgentTokenAnum,userId,RssAnum,totalView,RssTagAnum,UniqueID,Tag
0,266133909,3012,1886710,7480,43076,WallPaper-Animal-035,Animal
1,266133909,3012,1886612,15257,93213,WallPaper-Nature-083,Nature
2,266133909,3012,1927198,6238,43076,WallPaper-Animal-071,Animal


In [25]:
def wallp_recommendation(wallp_raw, user_full, sorted_user_predictions):

    #print ('User {0} has already viewed {1} wallpapers.'.format(userID, user_full.shape[0]))
    #print ('Recommending the highest {0} predicted view wallpapers which is not already viewed.'.format(num_recommendations))
    
    # Recommend the highest predicted wallpapers that the user hasn't seen yet.
    sorted_user_predictions = sorted_user_predictions[(sorted_user_predictions[['Predictions']] >= pred_cutoff).all(1)]
    wallp_not_seen = wallp_raw[~wallp_raw['RssAnum'].isin(user_full['RssAnum'])]
    #merged_notSeen_userPre = (wallp_not_seen.merge(sorted_user_predictions, how = 'left',left_on = 'RssAnum', 
    #                                              right_on = 'RssAnum').sort_values('Predictions', ascending = False))
    pred_wallpRaw = sorted_user_predictions.merge(wallp_raw, how = 'left',left_on = 'RssAnum',right_on = 'RssAnum')
    pred_wallpRaw = pred_wallpRaw[['Tag', 'Predictions']]
    return pred_wallpRaw, wallp_not_seen

predicted_tag, not_seen = wallp_recommendation(wallp_df, user_already_viewed, s_u_p)

In [26]:
not_seen.head(5)

Unnamed: 0,RssAnum,UniqueID,totalView,Tag
0,1886709,WallPaper-Abstract-054,2100,Abstract
2,1886711,WallPaper-Pattern-041,1851,Pattern
3,1886724,WallPaper-Abstract-055,4192,Abstract
4,1886609,WallPaper-Pattern-040,2092,Pattern
6,1886613,WallPaper-Nature-084,3161,Nature


In [27]:
#Function to list top 'recommedation_count' wallpapers from predicted Genre

def list_top_wallpFromEachGenre(recom_tag, recom_wallp_list, recom_count=5):
    pred_Tag = recom_tag.Tag.unique()
    pred_wallp = pd.DataFrame()
    tag_count = len(pred_Tag)
    
    if tag_count < 3 :
        recom_count = 5
    elif tag_count < 5 :
        recom_count = 3
    elif tag_count < 7 :
        recom_count = 3
    else:
        recom_count = 2 
    
    for index, tag in enumerate(pred_Tag):
        pred_wallp1 = recom_wallp_list[(not_seen['Tag'] == pred_Tag[index])].sort_values('totalView',
                                                                                        ascending=False).head(recom_count)
    #pred_wallp1 = not_seen[(not_seen['Tag'] == pred_Tag[index])].sort_values('totalView', ascending=False).head(recom_count)
        pred_wallp = pd.concat([pred_wallp,pred_wallp1])
        
    return pred_wallp.reset_index(drop = True)

top_wallpFromEachGenre = list_top_wallpFromEachGenre(predicted_tag, not_seen, recommendations_num)    

In [28]:
top_wallpFromEachGenre

Unnamed: 0,RssAnum,UniqueID,totalView,Tag
0,1969328,WallPaper-Nature-146,23561,Nature
1,1869193,WallPaper-Nature-045,22099,Nature
2,1927203,WallPaper-Animal-072,35850,Animal
3,2009932,WallPaper-Animal-078,29549,Animal
4,2170975,WallPaper-Celebrity-019,6570,Celebrity
5,2170941,WallPaper-Celebrity-007,6361,Celebrity
6,1862890,WallPaper-Emotion-021,22385,Emotion
7,1862692,WallPaper-Emotion-003,21493,Emotion
8,1969323,WallPaper-Abstract-104,19372,Abstract
9,1856593,WallPaper-Abstract-001,18494,Abstract


In [31]:
top_wallpFromEachGenre

Unnamed: 0,RssAnum,UniqueID,totalView,Tag
0,1969328,WallPaper-Nature-146,23561,Nature
1,1869193,WallPaper-Nature-045,22099,Nature
2,1927203,WallPaper-Animal-072,35850,Animal
3,2009932,WallPaper-Animal-078,29549,Animal
4,2170975,WallPaper-Celebrity-019,6570,Celebrity
5,2170941,WallPaper-Celebrity-007,6361,Celebrity
6,1862890,WallPaper-Emotion-021,22385,Emotion
7,1862692,WallPaper-Emotion-003,21493,Emotion
8,1969323,WallPaper-Abstract-104,19372,Abstract
9,1856593,WallPaper-Abstract-001,18494,Abstract


In [None]:
#Rough
masked_data = np.ma.masked_array(V, np.isnan(V))
weights = [1, 1, 1,1,1,1,1,1,1,1]
average = np.ma.average(masked_data, axis=1, weights=weights)
user_viewed_mean = average.filled(np.nan)

#user_viewed_mean = np.mean(V[~np.isnan(V)], axis=1)
#A[~np.isnan(A)].mean()
V_demeaned[np.isnan(V_demeaned)] = 0