In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from IPython import get_ipython
from IPython.display import display


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
file_path = '/content/drive/MyDrive/film_oneri/movies_dataset.csv'
df = pd.read_csv(file_path)

features = ['vote_average', 'vote_count', 'popularity', 'runtime', 'budget', 'revenue']
df_features = df[features].fillna(0)

scaler = StandardScaler()
X = scaler.fit_transform(df_features)

In [None]:
from tensorflow.keras import layers, models

input_dim = X.shape[1]
encoding_dim = 16  # Latent boyut

autoencoder = models.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(encoding_dim, activation='relu', name="encoded"),
    layers.Dense(32, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(input_dim, activation='linear')
])

autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()


In [None]:
autoencoder.fit(X, X, epochs=50, batch_size=32, shuffle=True, validation_split=0.1)

Epoch 1/50
[1m20329/20329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 3ms/step - loss: 0.2281 - val_loss: 1.0589
Epoch 2/50
[1m20329/20329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 3ms/step - loss: 0.1032 - val_loss: 1.6800
Epoch 3/50
[1m20329/20329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 3ms/step - loss: 0.2088 - val_loss: 1.0959
Epoch 4/50
[1m20329/20329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 3ms/step - loss: 0.1683 - val_loss: 0.8388
Epoch 5/50
[1m20329/20329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 3ms/step - loss: 0.2771 - val_loss: 1.7609
Epoch 6/50
[1m20329/20329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 3ms/step - loss: 0.2347 - val_loss: 0.6239
Epoch 7/50
[1m20329/20329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 3ms/step - loss: 0.0357 - val_loss: 0.7635
Epoch 8/50
[1m20329/20329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 3ms/step - loss: 0.2067 - val_loss: 1.5786


<keras.src.callbacks.history.History at 0x7d0ca6bd43d0>

In [None]:
from tensorflow.keras import layers, models

encoder = models.Model(inputs=autoencoder.layers[0].input, outputs=autoencoder.get_layer("encoded").output)
X_encoded = encoder.predict(X)

[1m22588/22588[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 1ms/step


In [None]:

def recommend_from_two_autoencoder(title1, title2, top_n=5):
    idx1 = df[df["title"] == title1].index[0]
    idx2 = df[df["title"] == title2].index[0]

    vec1 = X_encoded[idx1]
    vec2 = X_encoded[idx2]
    combined_vec = (vec1 + vec2) / 2

    sims = cosine_similarity([combined_vec], X_encoded)[0]

    similar_indices = sims.argsort()[::-1]
    recs = []
    for idx in similar_indices:
        if idx not in [idx1, idx2]:
            recs.append((df.iloc[idx]["title"], round(sims[idx], 3)))
        if len(recs) >= top_n:
            break
    return recs

recommend_from_two_autoencoder("Interstellar", "Inception")

[('Now You See Me', np.float32(0.999)),
 ('Bridge of Spies', np.float32(0.999)),
 ('Sin City', np.float32(0.998)),
 ('The Fast and the Furious', np.float32(0.998)),
 ('Iron Man', np.float32(0.998))]

In [None]:
autoencoder.save("autoencoder_model.h5")



In [9]:
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_squared_error, mean_absolute_error
# Define the path to save and load the model on Google Drive
model_path = '/content/drive/MyDrive/film_oneri/autoencoder_model.h5'

# Load the model from the specified path on Google Drive
autoencoder = load_model(model_path, compile=False)

In [11]:
X_pred = autoencoder.predict(X)  # X zaten dense numpy array

from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(X, X_pred)
mae = mean_absolute_error(X, X_pred)

print("🔹 MSE:", round(mse, 6))
print("🔹 MAE:", round(mae, 6))

[1m22588/22588[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 1ms/step
🔹 MSE: 0.05979
🔹 MAE: 0.009582
