## Importing needed libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from API.datebase import create_df

## Loading data

In [2]:
df = create_df()
df.head()

[{'id': 1, 'name': 'Akcja', 'id_generes': 28}, {'id': 1, 'name': 'Akcja', 'id_generes': 28}, {'id': 1, 'name': 'Akcja', 'id_generes': 28}, {'id': 1, 'name': 'Akcja', 'id_generes': 28}, {'id': 3, 'name': 'Przygodowy', 'id_generes': 12}, {'id': 3, 'name': 'Przygodowy', 'id_generes': 12}, {'id': 3, 'name': 'Przygodowy', 'id_generes': 12}, {'id': 3, 'name': 'Przygodowy', 'id_generes': 12}, {'id': 16, 'name': 'Sci-Fi', 'id_generes': 878}, {'id': 16, 'name': 'Sci-Fi', 'id_generes': 878}, {'id': 16, 'name': 'Sci-Fi', 'id_generes': 878}, {'id': 16, 'name': 'Sci-Fi', 'id_generes': 878}]
{'id': 5, 'title': 'Furiosa: Saga Mad Max', 'original_title': 'Furiosa: A Mad Max Saga', 'original_language': 'en', 'overview': 'Kiedy świat upada, młoda Furiosa zostaje uprowadzona z Zielonego Miejsca Wielu Matek. Wpada w ręce potężnej Hordy Bikerów, której przewodzi watażka Dementus. Po przebyciu Pustkowi porywacze docierają do Cytadeli, gdzie rządzi Wieczny Joe. Dwóch tyranów zaczyna walkę o władzę, Furiosa z

Unnamed: 0,id,title,original_title,original_language,overview,popularity,release_date,vote_average,vote_count,keywords,poster_path,genres
0,5,Furiosa: Saga Mad Max,Furiosa: A Mad Max Saga,en,"Kiedy świat upada, młoda Furiosa zostaje uprow...",3815.308,2024-05-22,7.696,1718,"furiosa, bikerów, cytadeli, dementus, docieraj...",/eLFOjrfPs61kaSvbBej5wZfaHtL.jpg,"Akcja, Akcja, Akcja, Akcja, Przygodowy, Przygo..."
1,6,Petricore,Petricore,it,,1.386,2024-06-15,0.0,0,,,"Tajemnica, Tajemnica, Tajemnica"
2,7,Rattle,Rattle,en,,0.981,2024-06-01,0.0,0,,,
3,8,The Killing Spree: Hunting Joanne Dennehy,The Killing Spree: Hunting Joanne Dennehy,en,,1.4,2024-06-18,0.0,0,,,"Dokumentalny, Dokumentalny"
4,9,Cadillac: Go Louder,Cadillac: Go Louder,en,,1.4,2024-06-28,0.0,0,,/d6Rtw6rDQgTWmH23BIOhLdBjXzR.jpg,"Fantasy, Fantasy"


## Creating a movie recommendation system

### Data preparation

Assigning grade ranges

In [3]:
df_copy = df.copy()
df_copy.loc[(df_copy['vote_average'] >= 0) & (df_copy['vote_average'] <= 1), 'rating_between'] = "between 0 and 1"
df_copy.loc[(df_copy['vote_average'] > 1) & (df_copy['vote_average'] <= 2), 'rating_between'] = "between 1 and 2"
df_copy.loc[(df_copy['vote_average'] > 2) & (df_copy['vote_average'] <= 3), 'rating_between'] = "between 2 and 3"
df_copy.loc[(df_copy['vote_average'] > 3) & (df_copy['vote_average'] <= 4), 'rating_between'] = "between 3 and 4"
df_copy.loc[(df_copy['vote_average'] > 4) & (df_copy['vote_average'] <= 5), 'rating_between'] = "between 4 and 5"
df_copy.loc[(df_copy['vote_average'] > 5) & (df_copy['vote_average'] <= 6), 'rating_between'] = "between 5 and 6"
df_copy.loc[(df_copy['vote_average'] > 6) & (df_copy['vote_average'] <= 7), 'rating_between'] = "between 6 and 7"
df_copy.loc[(df_copy['vote_average'] > 7) & (df_copy['vote_average'] <= 8), 'rating_between'] = "between 7 and 8"
df_copy.loc[(df_copy['vote_average'] > 8) & (df_copy['vote_average'] <= 9), 'rating_between'] = "between 8 and 9"
df_copy.loc[(df_copy['vote_average'] > 9) & (df_copy['vote_average'] <= 10), 'rating_between'] = "between 9 and 10"

Filling in missing values

In [4]:
df_copy['genres'] = df_copy['genres'].fillna('no category')

Creating categorical variables

In [5]:
rating_df = pd.get_dummies(df_copy['rating_between'])
language_df = pd.get_dummies(df_copy['original_language'])

Extracting the main category

In [6]:
df_copy['main_category'] = df_copy['genres'].apply(lambda x: x.split('-')[0])
genres_df = pd.get_dummies(df_copy.main_category)

## Creating features

In [7]:
features = pd.concat([rating_df, language_df, genres_df, df_copy['vote_average'], df_copy['vote_count']], axis=1)

### Feature scaling

In [8]:
min_max_scaler = MinMaxScaler()
features = min_max_scaler.fit_transform(features)

## Division into training and test set

In [9]:
X = features
y = df_copy[['title', 'main_category']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Creating a KNN model

In [10]:
n_neighbors = 10
knn = NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto').fit(X_train)

## Recommender function

In [11]:
def recommend_movies(title, n_recommendations=5):
    if title not in y_train['title'].values:
        return "Film nie znaleziony w bazie treningowej."
    
    idx = y_train[y_train['title'] == title].index[0]
    movie_features = X_train[idx]
    
    # Zmiana kształtu na (1, -1)
    movie_features = movie_features.reshape(1, -1)
    
    distances, indices = knn.kneighbors(movie_features, n_neighbors=n_recommendations + 1)
    
    recommendations = []
    for i in range(1, len(indices[0])):
        recommendations.append(y_train.iloc[indices[0][i]])
    
    return recommendations

## Testing the model

In [19]:
recommendations = recommend_movies("We Dream of Eden", n_recommendations=5)
for title, genres in recommendations:
    print(f"Film: {title}, Kategorie: {genres}")

Film: GAZA | La franja del exterminio, Kategorie: Dokumentalny
Film: يومين, Kategorie: Komedia, Dramat, Familijny
Film: Delirio, Kategorie: Dramat, Thriller
Film: We Dream of Eden, Kategorie: Dramat, Muzyczny
Film: Nowhere Near, Kategorie: Dramat


In [17]:
recommendations = recommend_movies("Komedia", n_recommendations=30)
for title, genres in recommendations:
    print(f"Film: {title}, Kategorie: {genres}")

ValueError: not enough values to unpack (expected 2, got 1)

In [53]:
def to_dict(df):
    row_dict ={}
    for index, row in df.iterrows():
        for column in df.columns:
            row_dict[column] = row[column] 
    return row_dict

In [59]:
def recommend_movies2(title, n_recommendations=5):
    if title not in y_train['title'].values:
        return "Film nie znaleziony w bazie treningowej."
    
    idx = y_train[y_train['title'] == title].index[0]
    movie_features = X_train[idx]
    
    # Zmiana kształtu na (1, -1)
    movie_features = movie_features.reshape(1, -1)
    
    distances, indices = knn.kneighbors(movie_features, n_neighbors=n_recommendations + 1)
    recommendations = []
    all_data = []
    for i in range(1, len(indices[0])):
        recommendations.append(y_train.iloc[indices[0][i]])
        recommended_title = y.iloc[indices[0][i]]['title']
        data = df[df['title'] == recommended_title]
        data = data.fillna("")
        all_data.append(to_dict(data))
    return all_data
    # return recommendations

In [60]:
recommendations = recommend_movies2("We Dream of Eden", n_recommendations=5)

In [61]:
recommendations

[{'id': 80,
  'title': 'Bad Shabbos',
  'original_title': 'Bad Shabbos',
  'original_language': 'en',
  'overview': '',
  'popularity': 5.66,
  'release_date': '2024-06-10',
  'vote_average': 6.0,
  'vote_count': 2,
  'keywords': '',
  'poster_path': '/84cEj8nVFIlkiWruoe11NMYaKIN.jpg',
  'genres': 'Komedia'},
 {'id': 164,
  'title': 'Shift',
  'original_title': 'Shift',
  'original_language': 'es',
  'overview': '',
  'popularity': 1.97,
  'release_date': '2024-06-21',
  'vote_average': 0.0,
  'vote_count': 0,
  'keywords': '',
  'poster_path': '/oWBq8LFrW2vzjdTebI1m6wEuzJv.jpg',
  'genres': 'Dramat'},
 {'id': 27,
  'title': 'Cadillac: Go Louder',
  'original_title': 'Cadillac: Go Louder',
  'original_language': 'en',
  'overview': '',
  'popularity': 1.4,
  'release_date': '2024-06-28',
  'vote_average': 0.0,
  'vote_count': 0,
  'keywords': '',
  'poster_path': '/d6Rtw6rDQgTWmH23BIOhLdBjXzR.jpg',
  'genres': ''},
 {'id': 28,
  'title': '7G: The Fight Against Phones',
  'original_titl