In [1]:
from keras.models import load_model
from keras import regularizers
import pandas as pd
import numpy as np
from keras.models import Sequential, Model
from keras.layers import Embedding, Reshape, Activation, Input, Dense, Flatten, Dropout
from keras.layers.merge import Dot, multiply, concatenate
from keras.utils import np_utils
from keras.utils.data_utils import get_file
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import skipgrams
from collections import defaultdict
import pandas as pd
from sklearn.model_selection import train_test_split
import numba as nb


Using TensorFlow backend.


In [2]:
class read_train_model():
    
    def __init__(self,data_path):
        self.data_path=data_path
        self.df_in=pd.DataFrame()
        self.df_train=pd.DataFrame()
        self.df_test=pd.DataFrame()
        self.n_user=0
        self.n_posts=0
        
    def read_data(self):
        self.df_in = pd.read_csv(self.data_path,sep='|')
        self.df_in.loc[self.df_in.Likes>0,'Likes']=1
        self.df_in.loc[self.df_in.Comments>0,'Comments']=1
        self.df_in.loc[self.df_in.Shares>0,'Shares']=1
        self.df_in.loc[self.df_in.Downloads>0,'Downloads']=1
        self.df_in.loc[self.df_in.Views>0,'Views']=1

        self.df_in['Rating']=self.df_in['Likes']+self.df_in['Comments']+self.df_in['Shares']+self.df_in['Downloads']+self.df_in['Views']
        self.df_in.drop(['Likes','Comments','Shares','Downloads','Views'],axis=1,inplace=True)
    
        self.df_in.UserId = self.df_in.UserId.astype('category').cat.codes.values
        self.df_in.PostId = self.df_in.PostId.astype('category').cat.codes.values

        self.df_train,self.df_test =train_test_split(self.df_in, test_size = 0.1,random_state = 42 )

        self.n_users = len(self.df_in.UserId.unique()) 
        self.n_posts = len(self.df_in.PostId.unique())
        
    
    
    def user_post_ids(self):
        dff = pd.read_csv(self.data_path,sep="|")
        dff.loc[dff.Likes>0,'Likes']=1
        dff.loc[dff.Comments>0,'Comments']=1
        dff.loc[dff.Shares>0,'Shares']=1
        dff.loc[dff.Downloads>0,'Downloads']=1
        dff.loc[dff.Views>0,'Views']=1

        dff['Rating']=dff['Likes']+dff['Comments']+dff['Shares']+dff['Downloads']+dff['Views']
        dff.drop(['Likes','Comments','Shares','Downloads','Views'],axis=1,inplace=True)
        trainn,testt = train_test_split(dff,test_size = 0.1,random_state =42)
        
        return trainn, testt


    def define_model(self):
        post_input = Input(shape=[1], name="post-Input")
        post_embedding = Embedding(self.n_posts+1,10,  name="post-Embedding")(post_input)
        lp = Dense(10,activation = 'relu',kernel_regularizer=regularizers.l2(0.001),)(post_embedding)
        Dropout(0.4)
        post_vec = Flatten(name="Flatten-post")(lp)

        user_input = Input(shape=[1], name="User-Input")
        user_embedding = Embedding(self.n_users+1, 10, name="User-Embedding")(user_input)
        l2 = Dense(10,activation = 'relu',kernel_regularizer=regularizers.l2(0.001))(user_embedding)
        Dropout(0.4)
        user_vec = Flatten(name="Flatten-Users")(l2)

        product_layer = Dot(name="Dot-Product", axes=1)([post_vec, user_vec])

        fully_connected_layer = Dense(10,activation ='relu')(product_layer)
        fully_connected_layer_2 = Dense(10,activation ='relu')(fully_connected_layer)
        fully_connected_layer_3 = Dense(10,activation ='relu')(fully_connected_layer_2)
        fully_connected_layer_4 = Dense(10,activation ='relu')(fully_connected_layer_3)


        output_connected_layer = Dense(1,activation ='linear')(fully_connected_layer_4)

        model = Model([user_input, post_input],output_connected_layer)
        model.compile(loss='mse', optimizer='adam', metrics=["mae"])
        return model
    
    def train_model(self):
        model =self.define_model()
        history = model.fit([self.df_train.UserId, self.df_train.PostId], self.df_train.Rating,validation_split=0.1 , epochs= 1, verbose=1)
        model.save('recommender_model.h5')
        return history
    
    def get_model(self):
        model = load_model('recommender_model.h5')
        print('model loaded')
        return model
    
    
    def get_estimation_data(self):
        def duplicate(testList,n ): 
            return list(testList*n)
                
        n_users,n_posts,train,test=self.n_user,self.n_posts,self.df_train,self.df_test
        trainn,testt=self.user_post_ids()
        len_post = len(test.PostId.unique())
        len_user= len(testt.UserId.unique())
        p = test.PostId.unique()
        unique_postids = p.tolist()
        upids=duplicate(unique_postids,len_user) #post_ids_looped


        u =test.UserId.unique()
        unique_userids =u.tolist()
        un = np.array(unique_userids)
        user_loop =np.repeat(unique_userids,len_post) #user_ids_looped
        ttpids = testt['PostId'].unique()
        ttuid = testt['UserId'].unique()
        pp = testt.PostId.unique()
        uunique_postids = pp.tolist()
        uupids=duplicate(uunique_postids,len_user) #post_ids_looped


        uu =testt.UserId.unique()
        uunique_userids =uu.tolist()
        uun = np.array(uunique_userids)
        uuser_loop =np.repeat(uunique_userids,len_post) #user_ids_looped
        post_data = np.array(upids)
        user = np.array(user_loop)
        model=self.get_model()
        estimations = model.predict([user, post_data]) #predictions
   
        pid =pd.DataFrame(uupids)  #forming dataframes
        uid =pd.DataFrame(uuser_loop)
        estimation =pd.DataFrame(estimations)
        dataa = pd.merge(estimation,pid,left_index =True,right_index = True)
        data = pd.merge(dataa,uid,left_index = True, right_index= True)
        data.rename(columns={'0_x':'estimation','0_y':'post_id',0:'user_id'},inplace = True)
        final_data_sorted = data.groupby(["user_id"]).apply(lambda x: x.sort_values(["estimation"], ascending = False)).reset_index(drop=True)
        return final_data_sorted



In [3]:
rec_model=read_train_model('/home/gaurav/Desktop/RecEng/ML-DataSet/Wall_Activity_User_Post.csv')
rec_model.read_data()
rec_model.define_model()
rec_model.train_model()


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 174008 samples, validate on 19335 samples
Epoch 1/1


<keras.callbacks.History at 0x7ffb1796de10>

In [12]:
import numba as nb
posts=rec_model.df_in['PostId'].unique()
users=rec_model.df_in['UserId'].unique()
final_df=pd.DataFrame(index=users)
model=rec_model.get_model()


i=1

for pid in posts:
    rec_posts_uid=[]
    post=np.full(shape=len(users),fill_value=pid)
    est=model.predict([users,post])
    final_df[pid]=est
    print(str(i)+':'+str(pid),end=',')
    i+=1


model loaded
1:0,2:1,3:2,4:3,5:4,6:5,7:6,8:7,9:8,10:9,11:11,12:12,13:13,14:14,15:15,16:16,17:17,18:18,19:19,20:20,21:21,22:22,23:23,24:24,25:25,26:27,27:28,28:29,29:30,30:33,31:34,32:35,33:36,34:37,35:38,36:40,37:41,38:42,39:43,40:45,41:46,42:48,43:49,44:51,45:52,46:53,47:54,48:55,49:56,50:57,51:58,52:59,53:60,54:61,55:62,56:63,57:64,58:65,59:66,60:67,61:68,62:69,63:70,64:71,65:72,66:73,67:74,68:75,69:76,70:77,71:78,72:79,73:80,74:81,75:82,76:83,77:84,78:85,79:86,80:87,81:90,82:91,83:92,84:93,85:94,86:95,87:96,88:97,89:98,90:99,91:100,92:101,93:103,94:104,95:105,96:106,97:107,98:108,99:109,100:110,101:111,102:112,103:113,104:115,105:116,106:117,107:118,108:119,109:120,110:121,111:122,112:124,113:125,114:126,115:127,116:128,117:129,118:130,119:131,120:132,121:133,122:134,123:135,124:136,125:137,126:138,127:139,128:140,129:141,130:142,131:143,132:144,133:145,134:146,135:147,136:148,137:149,138:150,139:151,140:152,141:153,142:154,143:155,144:156,145:157,146:158,147:159,148:160,149:161,150

In [13]:
final_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,123,88,211,212,253,296,305,315,440,519
1954,1.416018,1.216452,1.125131,1.393466,1.306524,1.350726,1.289001,1.218770,1.117405,1.372332,...,1.139622,1.326808,1.242005,1.165760,1.359068,1.335479,1.236794,1.271505,1.371594,1.300078
2030,1.970233,1.727143,1.602214,1.943622,1.836700,1.890423,1.821577,1.726952,1.587900,1.917575,...,1.623768,1.864184,1.757711,1.658721,1.897050,1.870282,1.752426,1.791571,1.914366,1.828867
2395,1.253507,1.081069,1.028525,1.233978,1.158582,1.197510,1.140959,1.083475,1.025118,1.215994,...,1.035678,1.176336,1.100933,1.049552,1.205654,1.184843,1.096679,1.127396,1.215941,1.152753
2443,1.690927,1.465134,1.354754,1.665938,1.567162,1.617219,1.550355,1.466305,1.343573,1.641961,...,1.373165,1.591701,1.493886,1.404375,1.624852,1.599262,1.488484,1.526279,1.639980,1.559863
2571,1.624745,1.404949,1.299807,1.600393,1.504627,1.553257,1.487531,1.406393,1.289811,1.577153,...,1.316657,1.528264,1.433030,1.346364,1.561026,1.536036,1.427657,1.464927,1.575472,1.497497
2802,1.947845,1.713705,1.593863,1.922235,1.818869,1.870904,1.804508,1.713381,1.580033,1.897155,...,1.614458,1.845757,1.742866,1.647914,1.877194,1.851455,1.737865,1.775402,1.893980,1.811354
2895,1.298254,1.109775,1.044011,1.277400,1.196954,1.238452,1.178713,1.112646,1.040003,1.258089,...,1.052216,1.215904,1.134633,1.068128,1.246855,1.224809,1.129367,1.163595,1.257900,1.190715
3388,1.104170,1.010452,0.979976,1.091348,1.048265,1.067124,1.038710,1.011848,0.978761,1.079822,...,0.983152,1.056746,1.019947,0.991912,1.073476,1.061053,1.017802,1.033754,1.080163,1.045599
3403,1.280848,1.097507,1.037019,1.260394,1.181479,1.222208,1.163345,1.099997,1.033239,1.241495,...,1.044849,1.200054,1.120213,1.059652,1.230569,1.208875,1.115050,1.148782,1.241369,1.175359
3714,1.852745,1.616146,1.497767,1.826776,1.722417,1.774998,1.706600,1.616521,1.484829,1.801375,...,1.517894,1.748857,1.645898,1.551116,1.782103,1.755609,1.640575,1.679113,1.798652,1.714818


34680