In [1]:
from keras.models import load_model
from keras import regularizers
import pandas as pd
import numpy as np
from keras.models import Sequential, Model
from keras.layers import Embedding, Reshape, Activation, Input, Dense, Flatten, Dropout
from keras.layers.merge import Dot, multiply, concatenate
from keras.utils import np_utils
from keras.utils.data_utils import get_file
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import skipgrams
from collections import defaultdict
import pandas as pd
from sklearn.model_selection import train_test_split
import numba as nb


Using TensorFlow backend.


In [2]:
class read_train_model():
    
    def __init__(self,data_path):
        self.data_path=data_path
        self.df_in=pd.DataFrame()
        self.df_train=pd.DataFrame()
        self.df_test=pd.DataFrame()
        self.n_user=0
        self.n_posts=0
        
    def read_data(self):
        self.df_in = pd.read_csv(self.data_path,sep='|')
        self.df_in.loc[self.df_in.Likes>0,'Likes']=1
        self.df_in.loc[self.df_in.Comments>0,'Comments']=1
        self.df_in.loc[self.df_in.Shares>0,'Shares']=1
        self.df_in.loc[self.df_in.Downloads>0,'Downloads']=1
        self.df_in.loc[self.df_in.Views>0,'Views']=1

        self.df_in['Rating']=self.df_in['Likes']+self.df_in['Comments']+self.df_in['Shares']+self.df_in['Downloads']+self.df_in['Views']
        self.df_in.drop(['Likes','Comments','Shares','Downloads','Views'],axis=1,inplace=True)
    
        self.df_in.UserId = self.df_in.UserId.astype('category').cat.codes.values
        self.df_in.PostId = self.df_in.PostId.astype('category').cat.codes.values

        self.df_train,self.df_test =train_test_split(self.df_in, test_size = 0.1,random_state = 42 )

        self.n_users = len(self.df_in.UserId.unique()) 
        self.n_posts = len(self.df_in.PostId.unique())
        
    
    
    def user_post_ids(self):
        dff = pd.read_csv(self.data_path,sep="|")
        dff.loc[dff.Likes>0,'Likes']=1
        dff.loc[dff.Comments>0,'Comments']=1
        dff.loc[dff.Shares>0,'Shares']=1
        dff.loc[dff.Downloads>0,'Downloads']=1
        dff.loc[dff.Views>0,'Views']=1

        dff['Rating']=dff['Likes']+dff['Comments']+dff['Shares']+dff['Downloads']+dff['Views']
        dff.drop(['Likes','Comments','Shares','Downloads','Views'],axis=1,inplace=True)
        trainn,testt = train_test_split(dff,test_size = 0.1,random_state =42)
        
        return trainn, testt


    def define_model(self):
        post_input = Input(shape=[1], name="post-Input")
        post_embedding = Embedding(self.n_posts+1,10,  name="post-Embedding")(post_input)
        lp = Dense(10,activation = 'relu',kernel_regularizer=regularizers.l2(0.001),)(post_embedding)
        Dropout(0.4)
        post_vec = Flatten(name="Flatten-post")(lp)

        user_input = Input(shape=[1], name="User-Input")
        user_embedding = Embedding(self.n_users+1, 10, name="User-Embedding")(user_input)
        l2 = Dense(10,activation = 'relu',kernel_regularizer=regularizers.l2(0.001))(user_embedding)
        Dropout(0.4)
        user_vec = Flatten(name="Flatten-Users")(l2)

        product_layer = Dot(name="Dot-Product", axes=1)([post_vec, user_vec])

        fully_connected_layer = Dense(10,activation ='relu')(product_layer)
        fully_connected_layer_2 = Dense(10,activation ='relu')(fully_connected_layer)
        fully_connected_layer_3 = Dense(10,activation ='relu')(fully_connected_layer_2)
        fully_connected_layer_4 = Dense(10,activation ='relu')(fully_connected_layer_3)


        output_connected_layer = Dense(1,activation ='linear')(fully_connected_layer_4)

        model = Model([user_input, post_input],output_connected_layer)
        model.compile(loss='mse', optimizer='adam', metrics=["mae"])
        return model
    
    def train_model(self):
        model =self.define_model()
        history = model.fit([self.df_train.UserId, self.df_train.PostId], self.df_train.Rating,validation_split=0.1 , epochs= 1, verbose=1)
        model.save('recommender_model.h5')
        return history
    
    def get_model(self):
        model = load_model('recommender_model.h5')
        print('model loaded')
        return model
    
    
    def get_estimation_data(self):
        def duplicate(testList,n ): 
            return list(testList*n)
                
        n_users,n_posts,train,test=self.n_user,self.n_posts,self.df_train,self.df_test
        trainn,testt=self.user_post_ids()
        len_post = len(test.PostId.unique())
        len_user= len(testt.UserId.unique())
        p = test.PostId.unique()
        unique_postids = p.tolist()
        upids=duplicate(unique_postids,len_user) #post_ids_looped


        u =test.UserId.unique()
        unique_userids =u.tolist()
        un = np.array(unique_userids)
        user_loop =np.repeat(unique_userids,len_post) #user_ids_looped
        ttpids = testt['PostId'].unique()
        ttuid = testt['UserId'].unique()
        pp = testt.PostId.unique()
        uunique_postids = pp.tolist()
        uupids=duplicate(uunique_postids,len_user) #post_ids_looped


        uu =testt.UserId.unique()
        uunique_userids =uu.tolist()
        uun = np.array(uunique_userids)
        uuser_loop =np.repeat(uunique_userids,len_post) #user_ids_looped
        post_data = np.array(upids)
        user = np.array(user_loop)
        model=self.get_model()
        estimations = model.predict([user, post_data]) #predictions
   
        pid =pd.DataFrame(uupids)  #forming dataframes
        uid =pd.DataFrame(uuser_loop)
        estimation =pd.DataFrame(estimations)
        dataa = pd.merge(estimation,pid,left_index =True,right_index = True)
        data = pd.merge(dataa,uid,left_index = True, right_index= True)
        data.rename(columns={'0_x':'estimation','0_y':'post_id',0:'user_id'},inplace = True)
        final_data_sorted = data.groupby(["user_id"]).apply(lambda x: x.sort_values(["estimation"], ascending = False)).reset_index(drop=True)
        return final_data_sorted



In [3]:
rec_model=read_train_model('/home/gaurav/Desktop/RecEng/Wall_Activity_User_Post.csv')
rec_model.read_data()
rec_model.define_model()
rec_model.train_model()


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 174008 samples, validate on 19335 samples
Epoch 1/1


<keras.callbacks.History at 0x7f7e5c940dd8>

In [21]:
import numba as nb
posts=rec_model.df_in['PostId'].unique()
final_df=pd.DataFrame(index=posts)
n_posts=len(posts)
user=np.full(shape=n_posts,fill_value=28)
users=rec_model.df_in['UserId'].unique()
model=rec_model.get_model()
i=1

def fill_df(users,posts,model,final_df,i):
    for uid in users:
        rec_posts_uid=[]
        for pid in posts:
            est=float(model.predict([np.array([uid]),np.array([pid])]))
            rec_posts_uid.append(est)
        final_df[uid]=rec_posts_uid
        print(str(i)+':'+str(uid),end=',')
        i+=1
    return final_df

@nb.jit(nopython=True)
def fill_df_nb(users,posts,model,final_df,i):
    for uid in users:
        rec_posts_uid=[]
        for pid in posts:
            est=float(model.predict([np.array([uid]),np.array([pid])]))
            rec_posts_uid.append(est)
        final_df[uid]=rec_posts_uid
        print(str(i)+':'+str(uid))
        i+=1
    return final_df
fill_df(users,posts,model,final_df,i)

model loaded


1:1954,2:2030,3:2395,4:2443,5:2571,6:2802,7:2895,8:3388,9:3403,10:3714,11:5499,12:5593,13:5736,14:5766,15:5996,16:6003,17:6122,18:6181,19:6198,20:6231,21:6268,22:6337,23:7181,24:7311,25:7513,26:7559,27:7594,28:7759,29:7905,30:8092,31:8117,32:8155,33:8162,34:8180,35:8250,36:8492,37:8637,38:8737,39:9001,40:9099,41:9131,42:9214,43:9502,44:9510,45:9585,46:9620,47:9713,48:9761,49:9951,50:10101,51:10239,52:10375,53:10429,54:10465,55:10539,56:10827,57:11269,58:11378,59:11952,60:12003,61:12338,62:12418,63:12625,64:12724,65:12729,66:12995,67:13268,68:13353,69:13471,70:13490,71:13570,72:13760,73:14007,74:14084,75:14088,76:14181,77:14284,78:14371,79:14438,80:14512,81:14556,82:14615,83:14658,84:14739,85:14745,86:14851,87:14854,88:14980,89:15001,90:15046,91:15071,92:15245,93:15307,94:15337,95:15405,96:15423,97:15447,98:15448,99:15497,100:15622,101:15645,102:15685,103:15696,104:15729,105:15907,106:15915,107:16085,108:16128,109:16257,110:16304,111:16307,112:16329,113:16559,114:16746,115:16747,116:167

864:10927,865:5288,866:5382,867:5422,868:5729,869:5932,870:6113,871:6798,872:6952,873:7338,874:2256,875:2536,876:2568,877:2715,878:2844,879:2937,880:5052,881:5606,882:5731,883:7200,884:7386,885:8103,886:8125,887:8572,888:8883,889:9557,890:9757,891:10105,892:11310,893:13229,894:13403,895:13864,896:13937,897:14500,898:14574,899:14747,900:14759,901:14767,902:14981,903:15021,904:15043,905:15100,906:15104,907:15189,908:15223,909:15753,910:16192,911:16291,912:16617,913:16999,914:17315,915:17539,916:17691,917:17738,918:18210,919:18211,920:18249,921:18372,922:18395,923:18409,924:18417,925:18634,926:18999,927:19170,928:12367,929:13754,930:2705,931:2730,932:3449,933:562,934:1945,935:2840,936:2904,937:4195,938:4294,939:4314,940:4961,941:5630,942:6009,943:6841,944:7351,945:7789,946:8520,947:8898,948:9003,949:10427,950:10857,951:11298,952:11874,953:12130,954:13070,955:13722,956:13895,957:13919,958:14297,959:14347,960:14555,961:15261,962:15303,963:15476,964:15876,965:17613,966:3624,967:2724,968:1735

KeyboardInterrupt: 