## Importing needed libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors

## Loading data

In [20]:
df = pd.read_csv('./data/movies.csv')
df.head()

Unnamed: 0,id,title,genres,original_language,overview,popularity,production_companies,release_date,budget,revenue,runtime,status,tagline,vote_average,vote_count,credits,keywords,poster_path,backdrop_path,recommendations
0,823464,Godzilla x Kong: The New Empire,Science Fiction-Action-Adventure,en,Following their explosive showdown Godzilla an...,10484.676,Legendary Pictures-Warner Bros. Pictures,2024-03-27,150000000.0,558503759.0,115.0,Released,Rise together or fall alone.,7.246,1878.0,Rebecca Hall-Brian Tyree Henry-Dan Stevens-Kay...,giant monster-sequel-dinosaur-kaiju-fantasy wo...,/v4uvGFAkKuYfyKLGZnYj6l47ERQ.jpg,/j3Z3XktmWB1VhsS8iXNcrR86PXi.jpg,1011985-1275232-359410-601796-967847-653346-69...
1,615656,Meg 2: The Trench,Action-Science Fiction-Horror,en,An exploratory dive into the deepest depths of...,8763.998,Apelles Entertainment-Warner Bros. Pictures-di...,2023-08-02,129000000.0,352056482.0,116.0,Released,Back for seconds.,7.079,1365.0,Jason Statham-Wu Jing-Shuya Sophia Cai-Sergio ...,based on novel or book-sequel-kaiju,/4m1Au3YkjqsxF8iwQy0fPYSxE0h.jpg,/qlxy8yo5bcgUw2KAmmojUKp4rHd.jpg,1006462-298618-569094-1061181-346698-1076487-6...
2,758323,The Pope's Exorcist,Horror-Mystery-Thriller,en,Father Gabriele Amorth Chief Exorcist of the V...,5953.227,Screen Gems-2.0 Entertainment-Jesus & Mary-Wor...,2023-04-05,18000000.0,65675816.0,103.0,Released,Inspired by the actual files of Father Gabriel...,7.433,545.0,Russell Crowe-Daniel Zovatto-Alex Essoe-Franco...,spain-rome italy-vatican-pope-pig-possession-c...,/9JBEPLTPSm0d1mbEcLxULjJq9Eh.jpg,/hiHGRbyTcbZoLsYYkO4QiCLYe34.jpg,713704-296271-502356-1076605-1084225-1008005-9...
3,667538,Transformers: Rise of the Beasts,Action-Adventure-Science Fiction,en,When a new threat capable of destroying the en...,5409.104,Skydance-Paramount-di Bonaventura Pictures-Bay...,2023-06-06,200000000.0,407045464.0,127.0,Released,Unite or fall.,7.34,1007.0,Anthony Ramos-Dominique Fishback-Luna Lauren V...,peru-alien-end of the world-based on cartoon-b...,/gPbM0MK8CP8A174rmUwGsADNYKD.jpg,/woJbg7ZqidhpvqFGGMRhWQNoxwa.jpg,496450-569094-298618-385687-877100-598331-4628...
4,693134,Dune: Part Two,Science Fiction-Adventure,en,Follow the mythic journey of Paul Atreides as ...,4742.163,Legendary Pictures,2024-02-27,190000000.0,683813734.0,167.0,Released,Long live the fighters.,8.3,2770.0,Timothée Chalamet-Zendaya-Rebecca Ferguson-Jav...,epic-based on novel or book-fight-sandstorm-sa...,/czembW0Rk1Ke7lCJGahbOhdCuhV.jpg,/xOMo8BRK7PfcJv9JCnx7s5hj0PX.jpg,438631-763215-792307-1011985-467244-634492-359...


## Creating a movie recommendation system

### Data preparation

Assigning grade ranges

In [21]:
df_copy = df.copy()
df_copy.loc[(df_copy['vote_average'] >= 0) & (df_copy['vote_average'] <= 1), 'rating_between'] = "between 0 and 1"
df_copy.loc[(df_copy['vote_average'] > 1) & (df_copy['vote_average'] <= 2), 'rating_between'] = "between 1 and 2"
df_copy.loc[(df_copy['vote_average'] > 2) & (df_copy['vote_average'] <= 3), 'rating_between'] = "between 2 and 3"
df_copy.loc[(df_copy['vote_average'] > 3) & (df_copy['vote_average'] <= 4), 'rating_between'] = "between 3 and 4"
df_copy.loc[(df_copy['vote_average'] > 4) & (df_copy['vote_average'] <= 5), 'rating_between'] = "between 4 and 5"
df_copy.loc[(df_copy['vote_average'] > 5) & (df_copy['vote_average'] <= 6), 'rating_between'] = "between 5 and 6"
df_copy.loc[(df_copy['vote_average'] > 6) & (df_copy['vote_average'] <= 7), 'rating_between'] = "between 6 and 7"
df_copy.loc[(df_copy['vote_average'] > 7) & (df_copy['vote_average'] <= 8), 'rating_between'] = "between 7 and 8"
df_copy.loc[(df_copy['vote_average'] > 8) & (df_copy['vote_average'] <= 9), 'rating_between'] = "between 8 and 9"
df_copy.loc[(df_copy['vote_average'] > 9) & (df_copy['vote_average'] <= 10), 'rating_between'] = "between 9 and 10"

Filling in missing values

In [22]:
df_copy['genres'] = df_copy['genres'].fillna('no category')

Creating categorical variables

In [23]:
rating_df = pd.get_dummies(df_copy['rating_between'])
language_df = pd.get_dummies(df_copy['original_language'])

Extracting the main category

In [24]:
df_copy['main_category'] = df_copy['genres'].apply(lambda x: x.split('-')[0])
genres_df = pd.get_dummies(df_copy.main_category)

## Creating features

In [25]:
features = pd.concat([rating_df, language_df, genres_df, df_copy['vote_average'], df_copy['vote_count']], axis=1)

### Feature scaling

In [26]:
min_max_scaler = MinMaxScaler()
features = min_max_scaler.fit_transform(features)

## Division into training and test set

In [27]:
X = features
y = df_copy[['title', 'main_category']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Creating a KNN model

In [28]:
n_neighbors = 10
knn = NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto').fit(X_train)

## Recommender function

In [29]:
def recommend_movies(title, n_recommendations=5):
    if title not in y_train['title'].values:
        return "Film nie znaleziony w bazie treningowej."
    
    idx = y_train[y_train['title'] == title].index[0]
    movie_features = X_train[idx]
    
    # Zmiana kształtu na (1, -1)
    movie_features = movie_features.reshape(1, -1)
    
    distances, indices = knn.kneighbors(movie_features, n_neighbors=n_recommendations + 1)
    
    recommendations = []
    for i in range(1, len(indices[0])):
        recommendations.append(y_train.iloc[indices[0][i]])
    
    return recommendations

## Testing the model

In [30]:
recommendations = recommend_movies("Toy Story", n_recommendations=5)
for title, genres in recommendations:
    print(f"Film: {title}, Kategorie: {genres}")

Film: Cardinal, Kategorie: no category
Film: Ellombara, Kategorie: no category
Film: Smiling Addiction, Kategorie: no category
Film: ROH & NJPW: Honor Rising Japan - Night 1, Kategorie: no category
Film: Make Ready To Survive - Shelter, Fire, Water, Kategorie: no category


In [32]:
recommendations = recommend_movies("Music", n_recommendations=30)
for title, genres in recommendations:
    print(f"Film: {title}, Kategorie: {genres}")

Film: Almost There, Kategorie: no category
Film: The Girls of St. Mary's, Kategorie: no category
Film: Ellombara, Kategorie: no category
Film: Round & Round, Kategorie: no category
Film: The Key and the Frame, Kategorie: no category
Film: Make Ready To Survive - Shelter, Fire, Water, Kategorie: no category
Film: Gd, Kategorie: no category
Film: His Last Burglary, Kategorie: no category
Film: Hunting Evil, Kategorie: no category
Film: Lola & Virginia (volume 2), Kategorie: no category
Film: VLOG The Movie, Kategorie: no category
Film: Wolves Unleashed, Kategorie: no category
Film: My Mama Wears Timbs, Kategorie: no category
Film: Cardinal, Kategorie: no category
Film: 2006200000, Kategorie: no category
Film: Out of Office, Kategorie: no category
Film: Prima névé, Kategorie: no category
Film: Cinescape, Kategorie: no category
Film: Et steinkast unna, Kategorie: no category
Film: Defective Detectives, Kategorie: no category
Film: Sen, Kategorie: no category
Film: Braaap 9, Kategorie: no c