In [None]:
import pandas as pd
import numpy as np

In [None]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

In [None]:
movies.head()

In [None]:
ratings.head()

In [None]:
#     1   2   3  4  5 user_id  

# 1   4
# 2   NAN 
# 3   4
# 4   NAN
# movie_id

In [None]:
final_dataset = ratings.pivot(index = "movieId", columns = "userId", values = "rating")

In [None]:
final_dataset.head()

In [None]:
final_dataset.fillna(0, inplace=True)
final_dataset.head()

In [None]:
# Removing noise from dataset

In [None]:
no_user_voted = ratings.groupby("movieId")['rating'].agg('count')
no_movie_voted = ratings.groupby("userId")['rating'].agg('count')

In [None]:
no_user_voted

In [None]:
no_movie_voted

In [None]:
import matplotlib.pyplot as plt
plt.style.use("ggplot")
fig,axes = plt.subplots(1,1, figsize=(16,4))
plt.scatter(no_user_voted.index, no_user_voted, color="hotpink")
plt.axhline(y=10, color="green")
plt.xlabel("MovieID")
plt.ylabel("No of users voted")
plt.show()

In [None]:
final_dataset = final_dataset.loc[no_user_voted[no_user_voted > 10].index, :]

In [None]:
final_dataset

In [None]:
import matplotlib.pyplot as plt
plt.style.use("ggplot")
fig,axes = plt.subplots(1,1, figsize=(16,4))
plt.scatter(no_movie_voted.index, no_movie_voted, color="hotpink")
plt.axhline(y=10, color="green")
plt.xlabel("MovieID")
plt.ylabel("No of users voted")
plt.show()

In [None]:
final_dataset = final_dataset.loc[:, no_movie_voted[no_movie_voted > 50].index]

In [None]:
final_dataset

In [None]:
final_dataset.shape

In [None]:
sample = np.array([[1,0,0,0,0], [0,2,0,0,1], [0,0,4,0,0]])
sparsity = 1 - np.count_nonzero(sample)/sample.size

In [None]:
print(sparsity)

In [None]:
from scipy.sparse import csr_matrix
csr_sample = csr_matrix(sample)
print(csr_sample)

In [None]:
csr_matrix

In [None]:
csr_data = csr_matrix(final_dataset.values)
final_dataset.reset_index(inplace=True)

In [None]:
print(csr_data)

In [None]:
from sklearn.neighbors import NearestNeighbors
knn = NearestNeighbors(metric="cosine", algorithm = "brute", n_neighbors=20, n_jobs=-1)
knn.fit(csr_data)

In [None]:
def get_recommendation(movie_name):
    movie_list = movies[movies['title'].str.contains(movie_name)]
    print(movie_list)
    if len(movie_list):
        print("Movie Found")
        movie_idx = movie_list.iloc[0]['movieId']
        movie_idx = final_dataset[final_dataset['movieId'] == movie_idx].index[0]
        print("Movie index:",movie_idx)
        distance, indices = knn.kneighbors(csr_data[movie_idx], n_neighbors=10+1)
        print("Distance : ",distance)
        print("Indices: ",indices)
        rec_movies_indices = sorted(list(zip(indices.squeeze().tolist(), distance.squeeze().tolist())), key=lambda x: x[1])[:0:-1]
        print(rec_movies_indices)
        recommended_movies = []
        for val in rec_movies_indices:
            movie_idx = final_dataset.iloc[val[0]]['movieId']
            idx = movies[movies['movieId'] == movie_idx].index
            recommended_movies.append({'Title': movies.iloc[idx]['title'].values[0], 'Distance': val[1]})
        df = pd.DataFrame(recommended_movies, index=range(1, 11))
        print(df)
        return df
    else:
        print("Movie not Found")

In [None]:
get_recommendation("Jumanji")

In [31]:
import gradio as gr
def recommend_movies(movie_name):
    df = get_recommendation(movie_name)
    if isInstance(df, pd.DataFrame):
        return df.to_string(index=False)
    else:
        return df
app = gr.Interface(
    fn = recommend_movies,
    inputs = "text",
    outputs = "text",
    description = "Enter a movie name to get a list of recommended movies"
)
app.launch()

ModuleNotFoundError: No module named 'gradio'