In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np

import tensorflow as tf

from sklearn.model_selection import train_test_split
from tf.python.keras.optimizers import Adam, RMSprop
from tf.python.keras.layers import Input, Dense, Flatten, Dropout, Activation
from tf.python.keras.models import Model
from tf.python.keras.regularizers import l2
from tf.python.keras import backend as K
from tf.python.keras import regularizers
from tf.keras.models import model_from_json

from scipy.sparse import csr_matrix


from sklearn import preprocessing
from tf.keras.utils import plot_model
fill_value = 2.5

In [None]:

def masked_se(y_true, y_pred):
        # masked function
        mask_true = K.cast(K.not_equal(y_true, fill_value), K.floatx())
        # masked squared error
        masked_squared_error = K.square(mask_true * (y_true - y_pred))
        masked_mse = K.sum(masked_squared_error, axis=-1)
        return masked_mse

def masked_mse(y_true, y_pred):
        # masked function
        mask_true = K.cast(K.not_equal(y_true, fill_value), K.floatx())
        # masked squared error
        masked_squared_error = K.square(mask_true * (y_true - y_pred))
        masked_mse = K.sum(masked_squared_error, axis=-1) / K.maximum(K.sum(mask_true, axis=-1), 1)
        return masked_mse    
    
def masked_rmse(y_true, y_pred):
        # masked function
        mask_true = K.cast(K.not_equal(y_true, fill_value), K.floatx())
        # masked squared error
        masked_squared_error = K.square(mask_true * (y_true - y_pred))
        masked_mse = K.sqrt(K.sum(masked_squared_error, axis=-1) / K.maximum(K.sum(mask_true, axis=-1), 1))
        return masked_mse

    
def masked_rmse_clip(y_true, y_pred):
        # masked function
        mask_true = K.cast(K.not_equal(y_true, fill_value), K.floatx())
        y_pred = K.clip(y_pred, 1, 5)
        # masked squared error
        masked_squared_error = K.square(mask_true * (y_true - y_pred))
        masked_mse = K.sqrt(K.sum(masked_squared_error, axis=-1) / K.maximum(K.sum(mask_true, axis=-1), 1))
        return masked_mse
  

In [None]:

from ibm_botocore.client import Config
import ibm_boto3

# lettura dataset dei film e dei ratings tramite l' utilizzo delle credenziali di accesso ad ibm cloud

credentials_movies = {
    'IAM_SERVICE_ID': '***',
    'IBM_API_KEY_ID': '***',
    'ENDPOINT': '***',
    'IBM_AUTH_ENDPOINT': '***',
    'BUCKET': '***',
    'FILE': 'movies.dat'
}

credentials_ratings = {
    'IAM_SERVICE_ID': '***',
    'IBM_API_KEY_ID': '***',
    'ENDPOINT': '***',
    'IBM_AUTH_ENDPOINT': '***',
    'BUCKET': '***',
    'FILE': 'ratings.dat'
}




cos_ratings = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials_ratings['IBM_API_KEY_ID'],
    ibm_service_instance_id=credentials_ratings['IAM_SERVICE_ID'],
    ibm_auth_endpoint=credentials_ratings['IBM_AUTH_ENDPOINT'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials_ratings['ENDPOINT'])

cos_movies = ibm_boto3.client(service_name='s3',
    ibm_api_key_id=credentials_movies['IBM_API_KEY_ID'],
    ibm_service_instance_id=credentials_movies['IAM_SERVICE_ID'],
    ibm_auth_endpoint=credentials_movies['IBM_AUTH_ENDPOINT'],
    config=Config(signature_version='oauth'),
    endpoint_url=credentials_movies['ENDPOINT'])

csv_body =  cos_ratings.get_object(Bucket='***',Key='ratings.dat')['Body'].read().decode(encoding='latin-1')
movies_body = cos_movies.get_object(Bucket='***',Key='movies.dat')['Body'].read().decode(encoding='latin-1')

df = pd.read_csv(pd.compat.StringIO(csv_body), sep="::", engine="python")

movies_df = pd.read_csv(pd.compat.StringIO(movies_body), sep="::", engine="python")


num_users = df['userId'].unique().max() + 1 
num_movies = df['movieId'].unique().max() + 1


df_mov = movies_df
df_user_ratings = df

df_mov.columns =['movieId', 'title', 'genres']
df_mov.set_index("movieId", drop=True, inplace=True)
df_user_ratings.columns =  ['userId', 'movieId', 'rating', 'timestamp'] 


df_user_ratings = df_user_ratings.merge(df_mov, how="inner", on="movieId" )

df_num = df_user_ratings[["movieId", "userId"]].groupby(by="movieId").count()

df_num["num_ratings"] = df_num["userId"]
df_num.drop(columns="userId", axis=1,inplace=True)

df_user_ratings = df_user_ratings.merge(df_num, how="inner", on="movieId") 


min_ratings = 40
#                                                                   è stato allenato con > 40 (anzichè >=) perciò
# il file json del modello e l' hd5 contengono un modello allenato con  dimensione dell' input di 2653 features (numero di film considerati)
df_user_ratings = df_user_ratings[  df_user_ratings["num_ratings"] > min_ratings ]


ratings_mat = pd.pivot_table(df_user_ratings,index="userId", columns="movieId", values="rating" ).fillna(fill_value)
ratings_mat_with_zeros = pd.pivot_table(df_user_ratings,index="userId", columns="movieId", values="rating" ).fillna(0)





dropout = 0.8

activation = 'selu'
last_activation = 'relu'
regularizer_encode = 0.001
regularizer_decode = 0.001


input_layer = x = Input(shape=(ratings_mat.shape[1],), name='UserRating')
encoder = Dense(512, activation=activation,
                name='EncLayer1'.format(i), kernel_regularizer=regularizers.l2(regularizer_encode))(input_layer)
encoder = Dense(256, activation=activation,
                name='EncLayer2'.format(i), kernel_regularizer=regularizers.l2(regularizer_encode))(encoder)
encoder = Dropout(rate = dropout)(encoder)
decoder = Dense(512, activation=activation, 
                      name='DecLayer3'.format(i), kernel_regularizer=regularizers.l2(regularizer_decode))(encoder)
output_layer = Dense(ratings_mat.shape[1], activation=last_activation,
                     name='UserScorePred', kernel_regularizer=regularizers.l2(regularizer_decode))(decoder)

model = Model(input_layer, output_layer)


model.compile(optimizer = Adam(lr=0.0001), loss=masked_rmse, metrics=[masked_rmse_clip]) 
model.summary() 


model.fit(x=ratings_mat, y=ratings_mat,
                  epochs=300,
                  batch_size=256,
                         verbose=1)


cos_ratings.put_object(Body=model.to_json(), Bucket=credentials_ratings['BUCKET'], Key="deep_ae_9_rmse.json")

model.save_weights("ae_model_weights_9_rmse.h5")


    
with open("ae_model_weights_9_rmse.h5", "rb") as fp:
    data = fp.read()
    cos_ratings.put_object(Body=data, Bucket=credentials_ratings['BUCKET'], Key="ae_model_weights_saved_9_rmse.hd5")




In [None]:
from tensorflow.keras.models import model_from_json
import json
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np

from sklearn.model_selection import train_test_split
from tensorflow.python.keras.optimizers import Adam, RMSprop
from tensorflow.python.keras.layers import Input, Dense, Flatten, Dropout, Activation
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras import backend as K
from tensorflow.python.keras import regularizers
from tensorflow.keras.models import model_from_json
import sys, json

from scipy.sparse import csr_matrix
import tensorflow as tf

from sklearn import preprocessing
from tensorflow.keras.utils import plot_model

import tensorflow.keras

# codice per la lettura del modello salvato in locale

model_json_file = open('./deep_ae_model/deep_ae.json', 'r')
model_json = model_json_file.read()
model_json_file.close()

model = model_from_json(  model_json )

model.load_weights("./deep_ae_model/deep_ae_weights.hd5")
print(model.summary())