In [19]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.impute import SimpleImputer


url = 'movies 2.csv'
movies_df = pd.read_csv(url)


movies_df = movies_df.dropna(subset=['Title', 'Rating', 'Genre', 'Budget'])


movies_df['Genre'] = movies_df['Genre'].apply(lambda x: x.split(','))


mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(movies_df['Genre'])
genre_df = pd.DataFrame(genre_matrix, columns=mlb.classes_, index=movies_df.index)

movies_features = pd.concat([movies_df[['Title', 'Rating', 'Budget']], genre_df], axis=1)


movies_features['Rating'] = pd.to_numeric(movies_features['Rating'], errors='coerce')
movies_features['Budget'] = pd.to_numeric(movies_features['Budget'].replace('[\$,]', '', regex=True), errors='coerce')


imputer = SimpleImputer(strategy='mean')
movies_features[['Rating', 'Budget']] = imputer.fit_transform(movies_features[['Rating', 'Budget']])

normalized_features = (movies_features[['Rating', 'Budget']] - movies_features[['Rating', 'Budget']].mean()) / movies_features[['Rating', 'Budget']].std()

final_features = pd.concat([normalized_features, genre_df], axis=1)

similarity_matrix = cosine_similarity(final_features)


def get_similar_movies(query_title, top_n=10):
    query_index = movies_features[movies_features['Title'] == query_title].index[0]
    similarity_scores = list(enumerate(similarity_matrix[query_index]))

    sorted_movies = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]

    similar_movie_indices = [i[0] for i in sorted_movies]
    similar_movie_titles = movies_features['Title'].iloc[similar_movie_indices].tolist()
    similar_movie_scores = [i[1] for i in sorted_movies]

    return list(zip(similar_movie_titles, similar_movie_scores))

query_movie = "Spider-Man: Into the Spider-Verse"
similar_movies = get_similar_movies(query_movie)
print(f"Top 10 movies similar to '{query_movie}':")
for title, score in similar_movies:
    print(f"{title}: {score:.2f}")

query_movie = "The Dark Knight"
similar_movies = get_similar_movies(query_movie)
print(f"\nTop 10 movies similar to '{query_movie}':")
for title, score in similar_movies:
    print(f"{title}: {score:.2f}")

query_movie = "Top Gun: Maverick"
similar_movies = get_similar_movies(query_movie)
print(f"\nTop 10 movies similar to '{query_movie}':")
for title, score in similar_movies:
    print(f"{title}: {score:.2f}")


Top 10 movies similar to 'Spider-Man: Into the Spider-Verse':
Paddington: 0.98
The Muppets: 0.98
Elf: 0.95
Noelle: 0.91
Paddington 2: 0.90
Charlotte's Web: 0.89
Night at the Museum: 0.84
The Christmas Chronicles: Part Two: 0.82
Bedtime Stories: 0.81
Mary Poppins Returns: 0.81

Top 10 movies similar to 'The Dark Knight':
Three Billboards Outside Ebbing, Missouri: 1.00
Knives Out: 0.98
Glass Onion: A Knives Out Mystery: 0.98
Bad Santa: 0.93
Tangerine: 0.92
Little Miss Sunshine: 0.90
Birdman or (The Unexpected Virtue of Ignorance): 0.90
Burn After Reading: 0.90
Captain Fantastic: 0.90
Lost in Translation: 0.90

Top 10 movies similar to 'Top Gun: Maverick':
The Last Samurai: 0.99
The Dark Knight Rises: 0.96
The Dark Knight: 0.96
Batman Begins: 0.94
Dune: 0.93
Blade Runner 2049: 0.93
The Batman: 0.92
The Revenant: 0.91
Dawn of the Planet of the Apes: 0.90
Avengers: Endgame: 0.88
