# Recommendation System

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('anime.csv')

In [None]:
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
df.shape

(12294, 7)

### Data Preprocessing

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [None]:
df.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [None]:
df1 = df.dropna()

In [None]:
df1.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [None]:
df1.duplicated().sum()

np.int64(0)

In [None]:
df1.shape

(12017, 7)

In [None]:
df1.columns

Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')

In [None]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
Index: 12017 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12017 non-null  int64  
 1   name      12017 non-null  object 
 2   genre     12017 non-null  object 
 3   type      12017 non-null  object 
 4   episodes  12017 non-null  object 
 5   rating    12017 non-null  float64
 6   members   12017 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 751.1+ KB


In [None]:
df1.describe()

Unnamed: 0,anime_id,rating,members
count,12017.0,12017.0,12017.0
mean,13638.001165,6.478264,18348.88
std,11231.076675,1.023857,55372.5
min,1.0,1.67,12.0
25%,3391.0,5.89,225.0
50%,9959.0,6.57,1552.0
75%,23729.0,7.18,9588.0
max,34519.0,10.0,1013917.0


In [None]:
df1.nunique()

anime_id    12017
name        12015
genre        3229
type            6
episodes      187
rating        598
members      6596
dtype: int64

In [None]:
df1['genre'].value_counts()

genre
Hentai                                 816
Comedy                                 521
Music                                  297
Kids                                   197
Comedy, Slice of Life                  174
                                      ... 
Action, Hentai, Mecha, Sci-Fi, Yaoi      1
Adventure, Fantasy, Hentai               1
Hentai, Horror, Yaoi                     1
Hentai, Space                            1
Drama, Hentai, Mystery, Romance          1
Name: count, Length: 3229, dtype: int64

### Feature Extraction

In [None]:
genres = df1['genre'].str.get_dummies(sep=', ')
df_numerical = pd.concat([df1.drop('genre', axis=1), genres], axis=1)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
numerical_cols = ['rating', 'members']
df_numerical[numerical_cols] = scaler.fit_transform(df_numerical[numerical_cols])
df_numerical.head()

Unnamed: 0,anime_id,name,type,episodes,rating,members,Action,Adventure,Cars,Comedy,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,32281,Kimi no Na wa.,Movie,1,2.824474,3.292044,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,5114,Fullmetal Alchemist: Brotherhood,TV,64,2.717032,14.00241,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,28977,Gintama°,TV,51,2.707265,1.732216,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,9253,Steins;Gate,TV,24,2.629126,11.833499,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,9969,Gintama&#039;,TV,51,2.619358,2.400518,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0


### Recommendation System

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
def recommend_similar_anime(target_anime, threshold=0.5):
    target_row = df_numerical[df_numerical['name'] == target_anime].drop(['name', 'type'], axis=1)   #find the row corresponding to the target anime
    df_numerical['episodes'] = pd.to_numeric(df_numerical['episodes'], errors='coerce')  #replace 'Unknown' values in 'episodes' column with NaN
    df_numerical.dropna(subset=['episodes'], inplace=True)  #drop rows with NaN values in 'episodes' column
    similarities = cosine_similarity(target_row, df_numerical.drop(['name', 'type'], axis=1))  #compute cosine similarity between target anime and all other anime
    similar_anime_indices = np.where(similarities > threshold)[1]  #get indices of anime with similarity above threshold
    similar_anime_indices = similar_anime_indices[similar_anime_indices != target_row.index[0]]  #exclude the target anime itself from recommendations
    recommended_anime = df_numerical.iloc[similar_anime_indices]['name'].tolist()  #get names of recommended anime
    return recommended_anime

In [None]:
threshold_values = [0.2, 0.5, 0.9]
target_anime = 'Nana'

for threshold in threshold_values:
    recommended_anime = recommend_similar_anime(target_anime, threshold=threshold)  #get recommendations for the current threshold
    df_recommendations = pd.DataFrame(recommended_anime, columns=['Recommended Anime'])   #create a DataFrame to display the recommended anime

    num_recommendations = len(recommended_anime)
    value_counts = df_recommendations['Recommended Anime'].value_counts()
    top_5_recommendations = value_counts.head(5)

    #display the information for the current threshold
    print(f"\nRecommendations for threshold {threshold}:")
    print("Recommended anime similar to '{}':".format(target_anime))
    print(df_recommendations)
    print("\nNumber of recommendations:", num_recommendations)
    print("\nValue counts of recommended anime:")
    print(value_counts)
    print("\nTop 5 recommendations:")
    print(top_5_recommendations)


Recommendations for threshold 0.2:
Recommended anime similar to 'Nana':
                                       Recommended Anime
0                                         Kimi no Na wa.
1                       Fullmetal Alchemist: Brotherhood
2                                               Gintama°
3                                            Steins;Gate
4                                          Gintama&#039;
...                                                  ...
11820       Toushindai My Lover: Minami tai Mecha-Minami
11821                                        Under World
11822                     Violence Gekiga David no Hoshi
11823  Violence Gekiga Shin David no Hoshi: Inma Dens...
11824                   Yasuji no Pornorama: Yacchimae!!

[11825 rows x 1 columns]

Number of recommendations: 11825

Value counts of recommended anime:
Recommended Anime
Saru Kani Gassen                       2
Shi Wan Ge Leng Xiaohua                2
Nudl Nude                              1
Sen to

In [None]:
threshold_values = [0.2, 0.5, 0.9]
target_anime = 'Kimi no Na wa.'

for threshold in threshold_values:
    recommended_anime = recommend_similar_anime(target_anime, threshold=threshold)  #get recommendations for the current threshold
    df_recommendations = pd.DataFrame(recommended_anime, columns=['Recommended Anime'])  #create a DataFrame to display the recommended anime

    num_recommendations = len(recommended_anime)
    value_counts = df_recommendations['Recommended Anime'].value_counts()
    top_5_recommendations = value_counts.head(5)

    #display the information for the current threshold
    print(f"\nRecommendations for threshold {threshold}:")
    print("Recommended anime similar to '{}':".format(target_anime))
    print(df_recommendations)
    print("\nNumber of recommendations:", num_recommendations)
    print("\nValue counts of recommended anime:")
    print(value_counts)
    print("\nTop 5 recommendations:")
    print(top_5_recommendations)


Recommendations for threshold 0.2:
Recommended anime similar to 'Kimi no Na wa.':
                                       Recommended Anime
0                       Fullmetal Alchemist: Brotherhood
1                                               Gintama°
2                                            Steins;Gate
3                                          Gintama&#039;
4      Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...
...                                                  ...
11819       Toushindai My Lover: Minami tai Mecha-Minami
11820                                        Under World
11821                     Violence Gekiga David no Hoshi
11822  Violence Gekiga Shin David no Hoshi: Inma Dens...
11823                   Yasuji no Pornorama: Yacchimae!!

[11824 rows x 1 columns]

Number of recommendations: 11824

Value counts of recommended anime:
Recommended Anime
Saru Kani Gassen                       2
Shi Wan Ge Leng Xiaohua                2
Nudl Nude 2                          

#### Interview Questions

In [None]:
#1.Can you explain the difference between user-based and item-based collaborative filtering?

#User-Based Collaborative Filtering: This approach recommends items to a user based on the
#preferences of similar users. It calculates the similarity between users, often using
#metrics like cosine similarity, Pearson correlation, or Jaccard similarity.

#Item-Based Collaborative Filtering:This approach recommends items based on the similarity
#between items rather than users. It calculates the similarity between items, typically
#based on user interactions or ratings. Common metrics include cosine similarity and
#adjusted cosine similarity.

In [None]:
#2.What is collaborative filtering, and how does it work?

#Collaborative filtering is a recommendation technique that predicts a user's preferences
#based on the preferences and behaviors of other users. It operates on the idea that users
#who have agreed in the past will likely agree in the future.