In [14]:
from keras.models import load_model
from keras import regularizers
import pandas as pd
import numpy as np
from keras.models import Sequential, Model
from keras.layers import Embedding, Reshape, Activation, Input, Dense, Flatten, Dropout
from keras.layers.merge import Dot, multiply, concatenate
from keras.utils import np_utils
from keras.utils.data_utils import get_file
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import skipgrams
from collections import defaultdict
import pandas as pd
from sklearn.model_selection import train_test_split
import numba as nb


In [15]:
class read_train_model():
    
    def __init__(self,data_path):
        self.data_path=data_path
        self.df_in=pd.DataFrame()
        self.df_train=pd.DataFrame()
        self.df_test=pd.DataFrame()
        self.n_user=0
        self.n_posts=0
        
    def read_data(self):
        self.df_in = pd.read_csv(self.data_path)
        self.df_in.loc[self.df_in.Likes>0,'Likes']=1
        self.df_in.loc[self.df_in.Comments>0,'Comments']=1
        self.df_in.loc[self.df_in.Shares>0,'Shares']=1
        self.df_in.loc[self.df_in.Downloads>0,'Downloads']=1
        self.df_in.loc[self.df_in.Views>0,'Views']=1

        self.df_in['Rating']=self.df_in['Likes']+self.df_in['Comments']+self.df_in['Shares']+self.df_in['Downloads']+self.df_in['Views']
        self.df_in.drop(['Likes','Comments','Shares','Downloads','Views'],axis=1,inplace=True)
    
        self.df_in.UserId = self.df_in.UserId.astype('category').cat.codes.values
        self.df_in.PostId = self.df_in.PostId.astype('category').cat.codes.values

        self.df_train,self.df_test =train_test_split(self.df_in, test_size = 0.1,random_state = 42 )

        self.n_users = len(self.df_in.UserId.unique()) 
        self.n_posts = len(self.df_in.PostId.unique())
        
        return self.n_users,self.n_posts,self.df_train,self.df_test
    
    
    def user_post_ids(self):
        dff = pd.read_csv(self.data_path)
        dff.loc[dff.Likes>0,'Likes']=1
        dff.loc[dff.Comments>0,'Comments']=1
        dff.loc[dff.Shares>0,'Shares']=1
        dff.loc[dff.Downloads>0,'Downloads']=1
        dff.loc[dff.Views>0,'Views']=1

        dff['Rating']=dff['Likes']+dff['Comments']+dff['Shares']+dff['Downloads']+dff['Views']
        dff.drop(['Likes','Comments','Shares','Downloads','Views'],axis=1,inplace=True)
        trainn,testt = train_test_split(dff,test_size = 0.1,random_state =42)
        
        return trainn, testt


    def define_model(self):
       
        post_input = Input(shape=[1], name="post-Input")
        post_embedding = Embedding(self.n_posts+1,10,  name="post-Embedding")(post_input)
        lp = Dense(10,activation = 'relu',kernel_regularizer=regularizers.l2(0.001),)(post_embedding)
        Dropout(0.4)
        post_vec = Flatten(name="Flatten-post")(lp)

        user_input = Input(shape=[1], name="User-Input")
        user_embedding = Embedding(self.n_users+1, 10, name="User-Embedding")(user_input)
        l2 = Dense(10,activation = 'relu',kernel_regularizer=regularizers.l2(0.001))(user_embedding)
        Dropout(0.4)
        user_vec = Flatten(name="Flatten-Users")(l2)

        product_layer = Dot(name="Dot-Product", axes=1)([post_vec, user_vec])

        fully_connected_layer = Dense(10,activation ='relu')(product_layer)
        fully_connected_layer_2 = Dense(10,activation ='relu')(fully_connected_layer)
        fully_connected_layer_3 = Dense(10,activation ='relu')(fully_connected_layer_2)
        fully_connected_layer_4 = Dense(10,activation ='relu')(fully_connected_layer_3)


        output_connected_layer = Dense(1,activation ='linear')(fully_connected_layer_4)

        model = Model([user_input, post_input],output_connected_layer)
        model.compile(loss='mse', optimizer='adam', metrics=["mae"])
        return model
    
    def train_model(self):
        model =self.define_model()
        history = model.fit([self.df_train.UserId, self.df_train.PostId], self.df_train.Rating,validation_split=0.1 , epochs= 1, verbose=1)
        model.save('recommender_model.h5')
        return history
    
    def get_model(self):
        model = load_model('recommender_model.h5')
        print('model loaded')
        return model
    
    
    def get_estimation_data(self):
        def duplicate(testList,n ): 
            return list(testList*n)
                
        n_users,n_posts,train,test=self.n_user,self.n_posts,self.df_train,self.df_test
        trainn,testt=self.user_post_ids()
        len_post = len(test.PostId.unique())
        len_user= len(testt.UserId.unique())
        p = test.PostId.unique()
        unique_postids = p.tolist()
        upids=duplicate(unique_postids,len_user) #post_ids_looped


        u =test.UserId.unique()
        unique_userids =u.tolist()
        un = np.array(unique_userids)
        user_loop =np.repeat(unique_userids,len_post) #user_ids_looped
        ttpids = testt['PostId'].unique()
        ttuid = testt['UserId'].unique()
        pp = testt.PostId.unique()
        uunique_postids = pp.tolist()
        uupids=duplicate(uunique_postids,len_user) #post_ids_looped


        uu =testt.UserId.unique()
        uunique_userids =uu.tolist()
        uun = np.array(uunique_userids)
        uuser_loop =np.repeat(uunique_userids,len_post) #user_ids_looped
        post_data = np.array(upids)
        user = np.array(user_loop)
        model=self.get_model()
        estimations = model.predict([user, post_data]) #predictions
   
        pid =pd.DataFrame(uupids)  #forming dataframes
        uid =pd.DataFrame(uuser_loop)
        estimation =pd.DataFrame(estimations)
        dataa = pd.merge(estimation,pid,left_index =True,right_index = True)
        data = pd.merge(dataa,uid,left_index = True, right_index= True)
        data.rename(columns={'0_x':'estimation','0_y':'post_id',0:'user_id'},inplace = True)
        final_data_sorted = data.groupby(["user_id"]).apply(lambda x: x.sort_values(["estimation"], ascending = False)).reset_index(drop=True)
        return final_data_sorted



In [16]:
model1=read_train_model('/home/msf/RecEng/Wall_Activity_User_Post.csv')
n_users,n_posts,train,test =model1.read_data()
#es = model1.get_estimation_data()

In [17]:
#model1=read_train_model('/home/msf/RecEng//ML-DataSet/Wall_Activity_User_Post.csv')
model1.read_data()
model1.define_model()
model1.train_model()


Train on 1108501 samples, validate on 123167 samples
Epoch 1/1


<keras.callbacks.History at 0x7fb817e99a20>

In [None]:
'''
test_userids = test.UserId.unique()
uidz = list(test_userids)
for u in uidz:
def rec(u):
    final_df=pd.DataFrame(index=users)
    
    post_data = np.array(list(set(test.PostId)))
    user = np.array([u for i in range(len(post_data))])
    predictions = model.predict([user, post_data])
    predictions = np.array([a[0] for a in predictions])
    recommended_post_ids = (-predictions).argsort()[:5]
    final_df[pid]=predictions
    print(predictions[recommended_post_ids])
    return predictions[recommended_post_ids],recommended_post_ids'''

In [8]:
posts=model1.df_in['PostId'].unique()
users=model1.df_in['UserId'].unique()

model=model1.get_model()
splits=len(posts)//4+1
posts_split_array=np.array_split(posts,splits)
estimations=np.empty([0,1])
repeat_count=len(splits)
posts_array=np.repeat(splits,len(users))
users_array=np.tile(users,repeat_count)
len(users_array),len(posts_array)

model loaded


TypeError: object of type 'int' has no len()

In [19]:
import time
posts=model1.df_in['PostId'].unique()
users=model1.df_in['UserId'].unique()

model=model1.get_model()
splits=len(posts)//4+1
posts_split_array=np.array_split(posts,splits)
estimations=np.empty([0,1])

i=1
start = time.time()
for splits in posts_split_array:
    repeat_count=len(splits)
    posts_array=np.repeat(splits,len(users))
    users_array=np.tile(users,repeat_count)
    est=model.predict([users_array,posts_array])
    estimations=np.append(estimations,est)
    print(str(i)+':'+str(splits),end=',')
    i+=1
users_index=np.repeat(users,len(posts))
posts_index=np.tile(posts,len(users))

final_df=pd.DataFrame({'Users':users_index,'Posts':posts_index,'Rating':estimations})
print("Complete")
end = time.time()
print('total time (s)= ' + str(end-start))


#returnk(pos)

model loaded


1:[0 1 3 4],2:[5 6 7 8],3:[ 9 10 11 12],4:[13 14 15 16],5:[17 18 19 20],6:[21 22 23 24],7:[25 26 27 28],8:[29 30 31 32],9:[33 34 35 36],10:[37 38 39 40],11:[41 42 43 44],12:[45 46 47 48],13:[49 50 51 52],14:[53 54 55 56],15:[57 58 59 60],16:[61 62 63 64],17:[65 66 67 69],18:[70 71 72 73],19:[74 75 76 77],20:[79 80 81 82],21:[83 84 85 86],22:[87 88 89 90],23:[91 92 93 94],24:[95 96 97 98],25:[100 101 102 103],26:[104 105 106 107],27:[108 109 110 111],28:[112 113 114 115],29:[116 117 118 119],30:[120 121 123 124],31:[125 126 127 128],32:[129 130 131 132],33:[133 134 135 136],34:[137 138 139 140],35:[141 142 143 144],36:[145 146 147 148],37:[149 150 151 152],38:[153 154 155 156],39:[157 158 159 160],40:[161 162 163 164],41:[165 166 167 168],42:[169 170 171 172],43:[173 174 175 176],44:[177 178 179 180],45:[181 182 183 184],46:[185 186 187 188],47:[189 190 191 192],48:[193 194 195 196],49:[197 198 199 200],50:[201 202 203 204],51:[205 206 207 208],52:[209 210 211 212],53:[213 214 215 216],

363:[1471 1472 1473 1474],364:[1475 1476 1477 1478],365:[1479 1482 1483 1484],366:[1485 1486 1487 1488],367:[1489 1490 1491 1492],368:[1493 1494 1495 1496],369:[1497 1498 1499 1500],370:[1501 1502 1504 1505],371:[1508 1509 1510 1511],372:[1512 1513 1514 1515],373:[1516 1517 1518 1519],374:[1520 1521 1522 1523],375:[1524 1525 1526 1527],376:[1528 1529 1530 1531],377:[1532 1533 1534 1535],378:[1536 1537 1538 1539],379:[1540 1541 1542 1543],380:[1544 1545 1546 1547],381:[1548 1549 1550 1551],382:[1552 1553 1554 1555],383:[1556 1557 1558 1559],384:[1560 1563 1564 1566],385:[1567 1568 1569 1570],386:[1572 1573 1574 1575],387:[1576 1577 1578 1579],388:[1580 1581 1582 1583],389:[1584 1585 1586 1587],390:[1588 1589 1590 1591],391:[1592 1593 1594 1595],392:[1596 1598 1600 1601],393:[1602 1603 1604 1605],394:[1606 1607 1608 1609],395:[1610 1611 1612 1613],396:[1614 1615 1616 1617],397:[1618 1619 1620 1621],398:[1622 1623 1624 1625],399:[1626 1627 1628 1629],400:[1630 1631 1632 1633],401:[1634 16

679:[2822 2823 2824 2825],680:[2826 2827 2828 2829],681:[2830 2831 2832 2833],682:[2834 2835 2836 2837],683:[2838 2839 2840 2841],684:[2842 2843 2844 2845],685:[2846 2847 2848 2849],686:[2850 2851 2852 2853],687:[2854 2855 2856 2857],688:[2858 2859 2860 2861],689:[2862 2863 2864 2866],690:[2867 2868 2869 2870],691:[2871 2872 2873 2874],692:[2875 2876 2877 2878],693:[2879 2880 2881 2882],694:[2883 2884 2885 2886],695:[2887 2888 2889 2891],696:[2892 2893 2894 2895],697:[2897 2898 2899 2900],698:[2901 2902 2903 2904],699:[2905 2906 2907 2908],700:[2909 2910 2911 2912],701:[2913 2914 2915 2916],702:[2917 2918 2919 2920],703:[2921 2922 2923 2924],704:[2925 2926 2927 2928],705:[2929 2930 2931 2932],706:[2933 2934 2935 2936],707:[2937 2938 2939 2940],708:[2941 2942 2943 2944],709:[2945 2946 2947 2948],710:[2949 2950 2951 2952],711:[2953 2954 2955 2956],712:[2957 2958 2959 2960],713:[2961 2962 2963 2964],714:[2965 2966 2967 2968],715:[2969 2970 2971 2972],716:[2973 2974 2975 2977],717:[2978 29

MemoryError: 

In [22]:
posts


array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  27,
        28,  29,  30,  33,  34,  35,  36,  37,  38,  40,  41,  42,  43,
        45,  46,  48,  49,  51,  52,  53,  54,  55,  56,  57,  58,  59,
        60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,
        73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,
        86,  87,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100,
       101, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 115,
       116, 117, 118, 119, 120, 121, 122, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169,
       170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
       183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 19

In [12]:
from numba import njit, jit
posts=model1.df_in['PostId'].unique()
users=model1.df_in['UserId'].unique()
final_df=pd.DataFrame(index=users)
model=model1.get_model()
i=1

@njit
def make_final_df(posts,users,final_df,model,i):
    
    for pid in posts:
        rec_posts_uid=[]
        post=np.full(shape=len(users),fill_value=pid)
        est=model.predict([users,post])
        final_df[pid]=est
        #print(str(i)+':'+str(pid),end=',')
        i+=1
    
make_final_df(posts,users,final_df,model,i)


model loaded


TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Internal error at <numba.typeinfer.ArgConstraint object at 0x7fdc1ce07630>:
--%<----------------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/msf/anaconda3/lib/python3.6/site-packages/numba/errors.py", line 627, in new_error_context
    yield
  File "/home/msf/anaconda3/lib/python3.6/site-packages/numba/typeinfer.py", line 201, in __call__
    assert ty.is_precise()
AssertionError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/msf/anaconda3/lib/python3.6/site-packages/numba/typeinfer.py", line 144, in propagate
    constraint(typeinfer)
  File "/home/msf/anaconda3/lib/python3.6/site-packages/numba/typeinfer.py", line 202, in __call__
    typeinfer.add_type(self.dst, ty, loc=self.loc)
  File "/home/msf/anaconda3/lib/python3.6/contextlib.py", line 99, in __exit__
    self.gen.throw(type, value, traceback)
  File "/home/msf/anaconda3/lib/python3.6/site-packages/numba/errors.py", line 635, in new_error_context
    six.reraise(type(newerr), newerr, tb)
  File "/home/msf/anaconda3/lib/python3.6/site-packages/numba/six.py", line 659, in reraise
    raise value
numba.errors.InternalError: [1m[1m[0m
[0m[1m[1] During: typing of argument at <ipython-input-12-c1e59cc03d20> (11)[0m
--%<----------------------------------------------------------------------------

[1m
File "<ipython-input-12-c1e59cc03d20>", line 11:[0m
[1mdef make_final_df(posts,users,final_df,model,i):
    <source elided>
    
[1m    for pid in posts:
[0m    [1m^[0m[0m

This error may have been caused by the following argument(s):
- argument 2: [1mcannot determine Numba type of <class 'pandas.core.frame.DataFrame'>[0m
- argument 3: [1mcannot determine Numba type of <class 'keras.engine.training.Model'>[0m

This is not usually a problem with Numba itself but instead often caused by
the use of unsupported features or an issue in resolving types.

To see Python/NumPy features supported by the latest release of Numba visit:
http://numba.pydata.org/numba-doc/dev/reference/pysupported.html
and
http://numba.pydata.org/numba-doc/dev/reference/numpysupported.html

For more information about typing errors and how to debug them visit:
http://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#my-code-doesn-t-compile

If you think your code should work with Numba, please report the error message
and traceback, along with a minimal reproducer at:
https://github.com/numba/numba/issues/new


In [26]:
#final_df

In [22]:
df1=pd.read_csv('/home/msf/RecEng/RecOut.csv')

In [24]:
df1['estimation'].unique()

array([1.2004685])

Unnamed: 0.1,Unnamed: 0,estimation,post_id,user_id
0,0,1.200468,2899,75
1,1,1.200468,3016,75
2,2,1.200468,2884,75
3,3,1.200468,2617,75
4,4,1.200468,2470,75
5,5,1.200468,2764,75
6,6,1.200468,2995,75
7,7,1.200468,3021,75
8,8,1.200468,2848,75
9,9,1.200468,2496,75


In [14]:
import time
from multiprocessing import Pool

def f(x):
    for i in range(x):
        for j in range(x):
            return print(x*x)
f(100000)


10000000000


In [None]:

if __name__ == '__main__':
    p = Pool(5)
    d = range(100000)
    start = time.time()
    print(p.map(f, d))
    end=time.time()
    print('total time (s)= ' + str(end-start))