In [1]:
import pandas as pd
import numpy as np
import seaborn as sns


In [2]:
pip install fuzzywuzzy

Note: you may need to restart the kernel to use updated packages.


**FuzzyWuzzy is a Python library used for string matching. 
It is particularly useful for comparing and matching strings based on their similarity,
even when the strings are not exactly the same. This can be useful in a variety of applications, 
such as data cleaning, deduplication, and record linkage.**

In [3]:
movies=pd.read_csv(r"C:\Users\Asus\Downloads/movies.csv")
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [4]:
ratings=pd.read_csv(r"C:\Users\Asus\Downloads/ratings.csv")
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [5]:
ratings.shape


(100836, 4)

In [6]:
movies.shape

(9742, 3)

In [7]:
movies_users=ratings.pivot(index='movieId',columns='userId',values='rating').fillna(0)
movies_users.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
from scipy.sparse import csr_matrix


**A sparse matrix is a matrix in which most of the elements are zero. These matrices are common in various fields such as scientific computing, machine learning, and network analysis, where the data naturally has many zero or non-existent elements.**

Characteristics of Sparse Matrices
High Proportion of Zeros: A sparse matrix has a large number of zero entries compared to the number of non-zero entries.
Efficient Storage: Storing sparse matrices in memory-efficient formats reduces the amount of memory required.
Specialized Operations: Sparse matrices often require specialized algorithms for matrix operations to take advantage of the sparsity.

In [9]:
mat_movies=csr_matrix(movies_users.values)

In [10]:
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
model=KMeans(n_clusters=4, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
model.fit(mat_movies)


In [11]:
from fuzzywuzzy import process



In [12]:
def recommender(movie_name,data,n):
  idx=process.extractOne(movie_name,movies['title'])[2]
  print('Movie Selected:',movies['title'][idx],'Index: ',idx)
  print('serching for recommondation......')
  distance,indices=model.kneighbors(data[idx],n_neighbors=n)
  for i in indices:
    print(movies['title'][i].where(i!=idx))





In [13]:
def recommender(movie_name, data, n, model, movies):
    # Find the index of the movie using fuzzy matching
    idx = process.extractOne(movie_name, movies['title'])[2]
    print('Movie Selected:', movies['title'][idx], 'Index:', idx)
    print('Searching for recommendations...')
    
    # Predict the cluster for the selected movie
    movie_cluster = model.predict(data[idx].reshape(1, -1))[0]
    
    # Find all movies in the same cluster
    cluster_indices = np.where(model.labels_ == movie_cluster)[0]
    
    # Remove the selected movie from the recommendations
    cluster_indices = cluster_indices[cluster_indices != idx]
    
    # Limit to the top n recommendations
    recommended_indices = cluster_indices[:n]
    
    # Print recommended movies
    recommended_movies = movies['title'][recommended_indices]
    
    print('Recommendations:')
    for movie in recommended_movies:
        print(movie)

Function Purpose
The recommender function recommends movies based on their similarity within 
clusters identified by a KMeans model. The function uses fuzzy string matching to find the closest match for the input movie title, predicts the cluster of the selected movie, and 
recommends other movies from the same cluster.

In [14]:
import numpy as np
recommender('Toy Story', mat_movies, 5, model, movies)

Movie Selected: Toy Story (1995) Index: 0
Searching for recommendations...
Recommendations:
Jumanji (1995)
Heat (1995)
GoldenEye (1995)
Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
Babe (1995)


In [18]:
streamlit_code = """
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from fuzzywuzzy import process
import streamlit as st

# Load the data
movies = pd.read_csv("C:/Users/Asus/Downloads/movies.csv")
ratings = pd.read_csv("C:/Users/Asus/Downloads/ratings.csv")

# Prepare the data
movies_users = ratings.pivot(index='movieId', columns='userId', values='rating').fillna(0)
mat_movies = csr_matrix(movies_users.values)

# Train the KNN model
model = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20)
model.fit(mat_movies)

# Define the recommender function
def recommender(movie_name, data, n=10):
    idx = process.extractOne(movie_name, movies['title'])[2]
    movie_id = movies.loc[idx, 'movieId']
    st.write('Movie Selected:', movies.loc[idx, 'title'])
    st.write('Genre:', movies.loc[idx, 'genres'])
    st.write('Rating:', ratings.loc[ratings['movieId'] == movie_id, 'rating'].mean())
    st.write('Searching for recommendations...')
    distances, indices = model.kneighbors(data[idx], n_neighbors=n)
    recommended_movies = [(movies.loc[i, 'title'], ratings.loc[ratings['movieId'] == movies.loc[i, 'movieId'], 'rating'].mean()) for i in indices.flatten() if i != idx]
    return recommended_movies

# Streamlit UI
st.title('Movie Recommender System')

# Dropdown menu for selecting the movie (alphabetically sorted)
movie_list = sorted(movies['title'].tolist())
user_input = st.selectbox('Select a movie:', movie_list)
num_recommendations = st.slider('Number of recommendations:', 1, 20, 10)

if user_input:
    recommendations = recommender(user_input, mat_movies, num_recommendations)
    st.write('Recommendations:')
    for movie, rating in recommendations:
        st.write(f"{movie} - Rating: {rating:.2f}")

# Run the Streamlit app
# Command: streamlit run app.py

"""

# Specify the file path where the app.py file will be saved
file_path = 'm1app.py'

# Write the content to the file
with open(file_path, 'w') as file:
    file.write(streamlit_code)

print(f"File '{file_path}' has been saved.")

File 'm1app.py' has been saved.
