In [36]:
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"]="3"
import tensorflow as tf
from keras import backend as K
from keras.layers import Embedding, Reshape, merge, Dropout, Dense,concatenate,Input,add,dot,Merge,Flatten
from keras.models import Sequential,Model
import pandas as pd
from keras.optimizers import Adam
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint

In [47]:
def create_model(n_age,n_gender, n_items, k_factors):
    x1=Input((1,),name='age')
    x2=Input((1,),name='gender')
    x3=Input((1,),name='movie')
    x1_1 = Embedding(output_dim=k_factors, input_dim=n_age+1, input_length=1,name='age_embed')(x1)
    x2_1 = Embedding(output_dim=k_factors, input_dim=n_gender+1, input_length=1,name='gender_embed')(x2)
    x3_1 = Embedding(output_dim=k_factors, input_dim=n_items+1, input_length=1,name='movie_embed')(x3)
    
    age_vecs = Flatten()(x1_1)
    gen_vecs = Flatten()(x2_1)
    item_vecs = Flatten()(x3_1)
    y1=dot([age_vecs,item_vecs],axes=1,name='age_out')
    y2=dot([gen_vecs,item_vecs],axes=1,name='gender_out')
    model = Model(inputs=[x1,x2,x3], outputs=[y1,y2])
    model.compile(loss='mse', optimizer=Adam(lr=1e-5))
    return model

In [38]:
K_FACTORS = 120
RNG_SEED = 1446557
MODEL_WEIGHTS_FILE = 'movielens_model_adam.h5'

In [4]:
training=pd.read_csv("/home/dhara/movielens/ml-1m/balance_movielens.csv",engine='python')

In [7]:
max_age = training['age'].drop_duplicates().max()
max_gender_enc = training['gender_enc'].drop_duplicates().max()
max_movieid = training['movieId'].drop_duplicates().max()
print len(training), 'ratings loaded.',max_age,max_gender_enc,max_movieid

597824 ratings loaded. 61 1 176271


In [8]:
shuffled_ratings = training.sample(frac=1., random_state=RNG_SEED)
Users = shuffled_ratings['user_emb_id'].values
print 'Users:', Users, ', shape =', Users.shape
Movies = shuffled_ratings['movie_emb_id'].values
print 'Movies:', Movies, ', shape =', Movies.shape
Ratings = shuffled_ratings['rating'].values
print 'Ratings:', Ratings, ', shape =', Ratings.shape

Users: [4386 4226 3937 ..., 1398 4627 3630] , shape = (597824,)
Movies: [123287    545    159 ...,   4898   1393   2002] , shape = (597824,)
Ratings: [ 5.   1.   1.  ...,  2.5  4.   2. ] , shape = (597824,)


In [9]:
print max_age,max_gender_enc,max_movieid

61 1 176271


In [48]:
model_new=create_model(n_age=max_age,n_gender=max_gender_enc,n_items=max_movieid,k_factors=K_FACTORS)
print model_new.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
age (InputLayer)                (None, 1)            0                                            
__________________________________________________________________________________________________
movie (InputLayer)              (None, 1)            0                                            
__________________________________________________________________________________________________
gender (InputLayer)             (None, 1)            0                                            
__________________________________________________________________________________________________
age_embed (Embedding)           (None, 1, 120)       7440        age[0][0]                        
__________________________________________________________________________________________________
movie_embe

In [31]:
Age = shuffled_ratings['age_emb_id'].values
print 'Ages:', Age, ', shape =', Age.shape
Gender = shuffled_ratings['gender_enc'].values
print 'Gender:', Gender, ', shape =', Gender.shape
Movies = shuffled_ratings['movie_emb_id'].values
print 'Movies:', Movies, ', shape =', Movies.shape
Ratings = shuffled_ratings['rating'].values
print 'Ratings:', Ratings, ', shape =', Ratings.shape

Ages: [22 26 29 ..., 18 56 23] , shape = (597824,)
Gender: [1 0 0 ..., 0 1 0] , shape = (597824,)
Movies: [123287    545    159 ...,   4898   1393   2002] , shape = (597824,)
Ratings: [ 5.   1.   1.  ...,  2.5  4.   2. ] , shape = (597824,)


In [None]:
callbacks = [EarlyStopping('val_age_out_loss', patience=5), 
             ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
history = model_new.fit([Age,Gender, Movies], [Ratings,Ratings], epochs=30, validation_split=.1,batch_size=20000, verbose=2, callbacks=callbacks)

In [None]:
MODEL_WEIGHTS_FILE="movielens_model_adam_2.h5"
callbacks = [EarlyStopping('val_age_out_loss', patience=5), 
             ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
history = model_new.fit([Age,Gender, Movies], [Ratings,Ratings], epochs=50, validation_split=.1,batch_size=20000, verbose=2, callbacks=callbacks)

In [None]:
MODEL_WEIGHTS_FILE="movielens_model_adam_3.h5"
callbacks = [EarlyStopping('val_age_out_loss', patience=5), 
              ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
# callbacks = [ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
history = model_new.fit([Age,Gender, Movies], [Ratings,Ratings], epochs=200, validation_split=.1,batch_size=20000, verbose=2, callbacks=callbacks)

In [50]:
MODEL_WEIGHTS_FILE="movielens_model_adam_5.h5"
callbacks = [EarlyStopping('val_age_out_loss', patience=5), 
              ModelCheckpoint(MODEL_WEIGHTS_FILE, save_best_only=True)]
history = model_new.fit([Age,Gender, Movies], [Ratings,Ratings], epochs=200, validation_split=.1,batch_size=20000, verbose=2, callbacks=callbacks)

Train on 538041 samples, validate on 59783 samples
Epoch 1/200
 - 1s - loss: 1.6956 - age_out_loss: 0.8460 - gender_out_loss: 0.8495 - val_loss: 1.9955 - val_age_out_loss: 0.9974 - val_gender_out_loss: 0.9981
Epoch 2/200
 - 1s - loss: 1.6955 - age_out_loss: 0.8460 - gender_out_loss: 0.8495 - val_loss: 1.9955 - val_age_out_loss: 0.9974 - val_gender_out_loss: 0.9981
Epoch 3/200
 - 1s - loss: 1.6954 - age_out_loss: 0.8459 - gender_out_loss: 0.8495 - val_loss: 1.9955 - val_age_out_loss: 0.9974 - val_gender_out_loss: 0.9981
Epoch 4/200
 - 1s - loss: 1.6954 - age_out_loss: 0.8459 - gender_out_loss: 0.8495 - val_loss: 1.9955 - val_age_out_loss: 0.9974 - val_gender_out_loss: 0.9981
Epoch 5/200
 - 1s - loss: 1.6954 - age_out_loss: 0.8459 - gender_out_loss: 0.8494 - val_loss: 1.9955 - val_age_out_loss: 0.9974 - val_gender_out_loss: 0.9981
Epoch 6/200
 - 1s - loss: 1.6953 - age_out_loss: 0.8459 - gender_out_loss: 0.8494 - val_loss: 1.9955 - val_age_out_loss: 0.9974 - val_gender_out_loss: 0.9981
E

In [161]:
def rate( model,age,gender, item_id):
        return model.predict([np.array([age]),np.array([gender]), np.array([item_id])])

In [49]:
model_new.load_weights("movielens_model_adam_4.h5")

In [162]:
output=rate(model_new,0,0,4223)

In [163]:
print output[1][0][0]

2.54702


In [55]:
users=df = pd.read_csv('/home/dhara/movielens/ml-1m/users.dat', sep='::', header=None, skiprows=0,engine='python', encoding='latin-1',
                    names=['userid', 'gender', 'age', 'occupation', 'zipcode'])

In [57]:
users[users['userid'] == 3000]

Unnamed: 0,userid,gender,age,occupation,zipcode
2999,3000,M,25,4,55408


In [167]:
user_ratings = training[(training['age'] == 4)][['userid', 'movieId', 'rating','age','gender']]

In [169]:
user_ratings['prediction'] = user_ratings.apply(lambda x: rate(model_new,x['age']-1,0, x['movieId']), axis=1)

In [171]:
def get_scalar(x):
    return x[0][0][0]

In [173]:
def get_scalar_gender(x):
    return x[1][0][0]

In [172]:
user_ratings['prediction_age']=user_ratings.apply(lambda x:get_scalar(x['prediction']),axis=1)

In [174]:
user_ratings['prediction_gender']=user_ratings.apply(lambda x:get_scalar_gender(x['prediction']),axis=1)

In [67]:
movies=pd.read_csv("/home/dhara/movielens/ml-latest/movies.csv")

In [176]:
user_rating_top=user_ratings.merge(movies, on='movieId', 
                                                how='inner', 
                                                suffixes=['_u', '_m']).sort_values(by='rating', ascending=False)

In [177]:
user_rating_top.head(10)

Unnamed: 0,userid,movieId,rating,age,gender,prediction,prediction_age,prediction_gender,title,genres
332,3087,92259,5.0,4,F,"[[[-0.0291892]], [[-0.0324015]]]",-0.029189,-0.032401,Intouchables (2011),Comedy|Drama
129,2133,356,5.0,4,F,"[[[3.64925]], [[3.64552]]]",3.649249,3.645521,Forrest Gump (1994),Comedy|Drama|Romance|War
131,1948,356,5.0,4,M,"[[[3.64925]], [[3.64552]]]",3.649249,3.645521,Forrest Gump (1994),Comedy|Drama|Romance|War
970,2853,340,5.0,4,M,"[[[3.01725]], [[3.00979]]]",3.017253,3.009794,"War, The (1994)",Adventure|Drama|War
973,5953,4499,5.0,4,M,"[[[3.87309]], [[3.84984]]]",3.873091,3.849835,Dirty Rotten Scoundrels (1988),Comedy
1481,4294,1569,5.0,4,M,"[[[2.8912]], [[2.88032]]]",2.891201,2.880322,My Best Friend's Wedding (1997),Comedy|Romance
977,1926,56174,5.0,4,F,"[[[0.00869921]], [[-0.00798006]]]",0.008699,-0.00798,I Am Legend (2007),Action|Horror|Sci-Fi|Thriller|IMAX
1478,5953,5902,5.0,4,M,"[[[3.84291]], [[3.84085]]]",3.842914,3.840845,Adaptation (2002),Comedy|Drama|Romance
983,2853,4,5.0,4,M,"[[[3.05206]], [[3.03969]]]",3.052064,3.039692,Waiting to Exhale (1995),Comedy|Drama|Romance
470,5255,1073,5.0,4,M,"[[[0.0848734]], [[0.101768]]]",0.084873,0.101768,Willy Wonka & the Chocolate Factory (1971),Children|Comedy|Fantasy|Musical


In [None]:
recommendations=training[(training['movieId'].isin(user_rating_top['movieId']) == False)][['movieId']].drop_duplicates()
recommendations['prediction'] = recommendations.apply(lambda x: rate(model_new,0,1,x['movieId']), axis=1)


In [179]:
recommendations_new=recommendations.merge(training,on='movieId',how='inner')
recommendations_new=recommendations_new[(recommendations_new['age']>1)&(recommendations_new['age']<18)]

In [180]:
recommendations_new=recommendations_new.drop_duplicates(subset=['movieId'], keep=False)

In [182]:
recommendations_new['prediction_age']=recommendations_new.apply(lambda x:get_scalar(x['prediction']),axis=1)

In [183]:
recommendations_new['prediction_gender']=recommendations_new.apply(lambda x:get_scalar_gender(x['prediction']),axis=1)

In [184]:
output_new=recommendations_new.merge(movies,on='movieId',how='inner',suffixes=['_u', '_m']).sort_values(by='prediction_age', 
                         ascending=False).head(10)


In [185]:
output_new

Unnamed: 0,movieId,prediction,userid,rating,timestamp,user_emb_id,movie_emb_id,gender,age,occupation,zipcode,age_desc,occ_desc,gender_enc,age_emb_id,prediction_age,prediction_gender,title,genres
219,3807,"[[[4.93749]], [[4.98937]]]",4294,4.0,989501369,4293,3806,M,2,10,75633,Under 18,K-12 student,0,1,4.937489,4.989367,Sinbad and the Eye of the Tiger (1977),Adventure|Fantasy
841,27368,"[[[4.70033]], [[4.75029]]]",468,3.5,1250792643,467,27367,F,7,10,55082,Under 18,K-12 student,1,6,4.700326,4.750285,Asterix & Obelix: Mission Cleopatra (Astérix &...,Adventure|Comedy|Fantasy
210,4383,"[[[4.58429]], [[4.60608]]]",3537,4.5,1231336587,3536,4382,M,2,10,97402,Under 18,K-12 student,0,1,4.584289,4.606077,"Crimson Rivers, The (Rivières pourpres, Les) (...",Crime|Drama|Mystery|Thriller
1931,7095,"[[[4.56822]], [[4.62915]]]",3604,1.5,1075942035,3603,7094,F,14,10,02879,Under 18,K-12 student,1,13,4.568215,4.629147,Looking for Mr. Goodbar (1977),Drama
1869,2936,"[[[4.46585]], [[4.48463]]]",3604,4.5,1185985127,3603,2935,F,14,10,02879,Under 18,K-12 student,1,13,4.46585,4.484628,Sullivan's Travels (1941),Adventure|Comedy|Romance
1037,27833,"[[[4.45969]], [[4.47077]]]",3780,0.5,1129494696,3779,27832,M,9,0,46979,Under 18,other or not specified,0,8,4.45969,4.47077,Silver City (2004),Comedy|Drama|Mystery|Thriller
872,6126,"[[[4.45092]], [[4.48002]]]",5662,4.0,1232310502,5661,6125,M,7,10,07960,Under 18,K-12 student,0,6,4.450917,4.480024,"Veronika Voss (Sehnsucht der Veronika Voss, Di...",Drama
700,6525,"[[[4.40742]], [[4.43849]]]",3780,3.5,1109257948,3779,6524,M,6,0,46979,Under 18,other or not specified,0,5,4.407422,4.438488,Nuts (1987),Drama
2167,7938,"[[[4.3895]], [[4.39616]]]",1673,4.0,1272356277,1672,7937,M,16,10,98043-3621,Under 18,K-12 student,0,15,4.389503,4.396155,Winter Light (Nattvardsgästerna) (1963),Drama
1563,7937,"[[[4.3546]], [[4.3961]]]",5662,4.0,1154285246,5661,7936,M,12,10,07960,Under 18,K-12 student,0,11,4.3546,4.396101,"Silence, The (Tystnaden) (1963)",Drama
