In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from keras.layers import Layer
from keras.saving import register_keras_serializable

@register_keras_serializable()
class DebugLayer(Layer):
    def __init__(self, **kwargs):
        super(DebugLayer, self).__init__(**kwargs)

    def call(self, inputs):
        return inputs

In [3]:
import tensorflow as tf

model = tf.keras.models.load_model('/content/drive/My Drive/CMPE256_project/FMachine.keras', custom_objects={'DebugLayer': DebugLayer})
model.summary()

In [4]:
import pandas as pd
train = pd.read_csv('/content/drive/MyDrive/CMPE256_project/train_final.csv')

In [5]:
user_to_movie = train.groupby('User_ID')['Movie_ID'].apply(set)
print(user_to_movie)

User_ID
6          {1542, 528, 1561, 30, 4127, 1571, 3624, 2095, ...
7          {1542, 2577, 1043, 2580, 1046, 535, 3611, 4123...
79         {1542, 3082, 528, 2580, 1046, 1561, 3610, 4123...
97         {1542, 4123, 3612, 2077, 3106, 4131, 550, 4135...
134        {1027, 1542, 1035, 1558, 1561, 3610, 3617, 259...
                                 ...                        
2649370    {1665, 4356, 3333, 1798, 2186, 2192, 3216, 386...
2649378    {1408, 3840, 4227, 900, 4356, 1542, 1798, 2831...
2649388    {4227, 1542, 4479, 4488, 2953, 2955, 908, 4364...
2649426    {4356, 2699, 17, 273, 2580, 406, 1046, 2200, 1...
2649429    {4356, 3333, 1542, 1798, 1289, 2186, 3466, 143...
Name: Movie_ID, Length: 143458, dtype: object


In [6]:
movie_properties = ['Year', 'runtimeMinutes', 'Adult', 'movie', 'short', 'tvEpisode', 'tvMiniSeries',
                         'tvMovie', 'tvSeries', 'tvSpecial', 'video', 'Action',
                         'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
                         'Documentary', 'Drama', 'Family', 'Fantasy', 'Film-Noir',
                         'History', 'Horror', 'Music', 'Musical', 'Mystery', 'News',
                         'Reality-TV', 'Romance', 'Sci-Fi', 'Short', 'Sport',
                         'Talk-Show', 'Thriller', 'War', 'Western']

movies = train.drop_duplicates(subset=['Movie_ID'])[['Movie_ID'] + movie_properties]
movies = movies.set_index('Movie_ID').to_dict(orient='index')
print(movies)

{3: {'Year': 1997, 'runtimeMinutes': 122, 'Adult': False, 'movie': True, 'short': False, 'tvEpisode': False, 'tvMiniSeries': False, 'tvMovie': False, 'tvSeries': False, 'tvSpecial': False, 'video': False, 'Action': False, 'Adventure': False, 'Animation': False, 'Biography': False, 'Comedy': False, 'Crime': True, 'Documentary': False, 'Drama': True, 'Family': False, 'Fantasy': False, 'Film-Noir': False, 'History': False, 'Horror': False, 'Music': False, 'Musical': False, 'Mystery': True, 'News': False, 'Reality-TV': False, 'Romance': False, 'Sci-Fi': False, 'Short': False, 'Sport': False, 'Talk-Show': False, 'Thriller': False, 'War': False, 'Western': False}, 16: {'Year': 1996, 'runtimeMinutes': 108, 'Adult': False, 'movie': True, 'short': False, 'tvEpisode': False, 'tvMiniSeries': False, 'tvMovie': False, 'tvSeries': False, 'tvSpecial': False, 'video': False, 'Action': False, 'Adventure': False, 'Animation': False, 'Biography': False, 'Comedy': False, 'Crime': False, 'Documentary': Fal

In [7]:
user_ids = train['User_ID'].values
movie_ids = train['Movie_ID'].values

# Normalize user IDs and movie IDs
user_ids = pd.Series(user_ids)  # Convert numpy array to pandas Series
movie_ids = pd.Series(movie_ids)  # Convert numpy array to pandas Series
user_id_mapping = {user_id: idx for idx, user_id in enumerate(user_ids.unique())}
movie_id_mapping = {movie_id: idx for idx, movie_id in enumerate(movie_ids.unique())}

In [16]:
i = 0
users_to_predict = []
movies_to_predict = []
other_features = []
for user in user_to_movie.keys():
  for movie in movies.keys():
    if movie not in user_to_movie[user]:
      users_to_predict.append(user)
      movies_to_predict.append(movie)
      other_features.append(movies[movie])
      i += 1
    if i > 1000:
      break
  if i > 1000:
    break

In [21]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Preprocessing functions (adapt as needed for your new dataset)
def preprocess_data(user_ids, movie_ids, other_features, user_id_mapping, movie_id_mapping):
    # Re-index user and movie IDs
    user_ids_reindexed = user_ids.map(user_id_mapping)
    movie_ids_reindexed = movie_ids.map(movie_id_mapping)

    # Normalize numeric features
    numeric_features_scaled = scaler.fit_transform(other_features[['Year', 'runtimeMinutes']])

    # Concatenate scaled numeric and binary features
    binary_features = other_features.drop(columns=['Year', 'runtimeMinutes'])
    other_features_processed = pd.concat(
        [pd.DataFrame(numeric_features_scaled, columns=['Year', 'runtimeMinutes']), binary_features], axis=1
    ).fillna(0)

    return user_ids_reindexed, movie_ids_reindexed, other_features_processed

In [18]:
import numpy as np

users_reindexed, movies_reindexed, other_features = preprocess_data(
    pd.Series(users_to_predict),
    pd.Series(movies_to_predict),
    pd.DataFrame(other_features),
    user_id_mapping,
    movie_id_mapping)

In [19]:
print(users_reindexed.max())
print(users_reindexed.min())
print(movies_reindexed.max())
print(movies_reindexed.min())
print(other_features)

71877
52136
1057
0
          Year  runtimeMinutes  Adult  movie  short  tvEpisode  tvMiniSeries  \
0     0.446610        0.804466  False   True  False      False         False   
1     0.381856        0.336752  False   True  False      False         False   
2     0.964635       -0.064146  False  False  False      False         False   
3     0.252350        0.771058  False   True  False      False         False   
4     0.899882       -0.331412  False   True  False      False         False   
...        ...             ...    ...    ...    ...        ...           ...   
996   0.317103       -0.264595  False   True  False      False         False   
997   0.835129       -0.431636  False   True  False      False         False   
998   0.381856        0.704242  False   True  False      False         False   
999   0.252350        0.971507  False   True  False      False         False   
1000  0.640869       -0.431636  False   True  False      False         False   

      tvMovie  tvSer

In [25]:
predictions = model.predict([users_reindexed,
               movies_reindexed,
               pd.DataFrame(other_features)])

predictions_rescaled = predictions * (train['Rating'].max() - train['Rating'].min()) + train['Rating'].min()
print(predictions_rescaled)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[[2.9354832]
 [3.1227403]
 [2.937117 ]
 ...
 [4.03127  ]
 [3.7018569]
 [3.6479323]]
