In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (15, 8)
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

In [2]:
data = pd.read_csv('/content/anime.csv')
data

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [3]:
data.shape

(12294, 7)

In [4]:
data.duplicated().sum()

np.int64(0)

In [5]:
data.describe()

Unnamed: 0,anime_id,rating,members
count,12294.0,12064.0,12294.0
mean,14058.221653,6.473902,18071.34
std,11455.294701,1.026746,54820.68
min,1.0,1.67,5.0
25%,3484.25,5.88,225.0
50%,10260.5,6.57,1550.0
75%,24794.5,7.18,9437.0
max,34527.0,10.0,1013917.0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [7]:
data.drop(['anime_id', 'episodes'], axis = 1, inplace = True)

In [8]:
data.isnull().sum()

Unnamed: 0,0
name,0
genre,62
type,25
rating,230
members,0


In [9]:
null_pct = data.isnull().mean().mul(100)

for col, pct in null_pct.items():
    print(f'Null value percentage of {col} is :{pct}')


Null value percentage of name is :0.0
Null value percentage of genre is :0.504311046038718
Null value percentage of type is :0.20335122824141857
Null value percentage of rating is :1.8708312998210508
Null value percentage of members is :0.0


In [10]:
data = data.dropna().reset_index(drop=True)
data

Unnamed: 0,name,genre,type,rating,members
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,9.37,200630
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,9.26,793665
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.25,114262
3,Steins;Gate,"Sci-Fi, Thriller",TV,9.17,673572
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.16,151266
...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,4.15,211
12013,Under World,Hentai,OVA,4.28,183
12014,Violence Gekiga David no Hoshi,Hentai,OVA,4.88,219
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,4.98,175


In [11]:
type_ohe = pd.get_dummies(data['type'], prefix = 'type').astype(int)
data = pd.concat([data, type_ohe], axis = 1)
data

Unnamed: 0,name,genre,type,rating,members,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,9.37,200630,1,0,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,9.26,793665,0,0,0,0,0,1
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.25,114262,0,0,0,0,0,1
3,Steins;Gate,"Sci-Fi, Thriller",TV,9.17,673572,0,0,0,0,0,1
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,9.16,151266,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,4.15,211,0,0,0,1,0,0
12013,Under World,Hentai,OVA,4.28,183,0,0,0,1,0,0
12014,Violence Gekiga David no Hoshi,Hentai,OVA,4.88,219,0,0,0,1,0,0
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,4.98,175,0,0,0,1,0,0


In [12]:
data.drop('type', axis = 1, inplace = True)

In [13]:
data['genre'].unique()

array(['Drama, Romance, School, Supernatural',
       'Action, Adventure, Drama, Fantasy, Magic, Military, Shounen',
       'Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen',
       ..., 'Action, Comedy, Hentai, Romance, Supernatural',
       'Hentai, Sports', 'Hentai, Slice of Life'], dtype=object)

In [14]:
data['genre_split'] = data['genre'].fillna('').apply(lambda x: x.split(', '))
data

Unnamed: 0,name,genre,rating,members,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,genre_split
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",9.37,200630,1,0,0,0,0,0,"[Drama, Romance, School, Supernatural]"
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.26,793665,0,0,0,0,0,1,"[Action, Adventure, Drama, Fantasy, Magic, Mil..."
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",9.25,114262,0,0,0,0,0,1,"[Action, Comedy, Historical, Parody, Samurai, ..."
3,Steins;Gate,"Sci-Fi, Thriller",9.17,673572,0,0,0,0,0,1,"[Sci-Fi, Thriller]"
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",9.16,151266,0,0,0,0,0,1,"[Action, Comedy, Historical, Parody, Samurai, ..."
...,...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,4.15,211,0,0,0,1,0,0,[Hentai]
12013,Under World,Hentai,4.28,183,0,0,0,1,0,0,[Hentai]
12014,Violence Gekiga David no Hoshi,Hentai,4.88,219,0,0,0,1,0,0,[Hentai]
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,4.98,175,0,0,0,1,0,0,[Hentai]


In [15]:
genre_df = pd.DataFrame(
    MultiLabelBinarizer().fit_transform(data['genre_split']),
    columns=MultiLabelBinarizer().fit(data['genre_split']).classes_,
    index=data.index
)


In [16]:
genre_df

Unnamed: 0,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,1,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12012,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12013,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12014,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12015,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
data.drop(['genre', 'genre_split'], axis = 1, inplace = True)
data = pd.concat([data,genre_df], axis = 1)
data

Unnamed: 0,name,rating,members,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,Action,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,Kimi no Na wa.,9.37,200630,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,9.26,793665,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,Gintama°,9.25,114262,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
3,Steins;Gate,9.17,673572,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
4,Gintama&#039;,9.16,151266,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,4.15,211,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12013,Under World,4.28,183,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12014,Violence Gekiga David no Hoshi,4.88,219,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,4.98,175,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
scaler = StandardScaler()
scaled_values = scaler.fit_transform(data[['rating', 'members']])
data[['rating', 'members']] = scaled_values


In [19]:
data

Unnamed: 0,name,rating,members,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,Action,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,Kimi no Na wa.,2.824474,3.292044,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,2.717032,14.002410,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
2,Gintama°,2.707265,1.732216,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
3,Steins;Gate,2.629126,11.833499,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
4,Gintama&#039;,2.619358,2.400518,0,0,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,-2.274108,-0.327575,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12013,Under World,-2.147132,-0.328080,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12014,Violence Gekiga David no Hoshi,-1.561088,-0.327430,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,-1.463414,-0.328225,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
cosine_df = pd.DataFrame(
    cosine_similarity(data.drop(columns='name')),
    index=data['name'],
    columns=data['name']
)


In [21]:
cosine_df

name,Kimi no Na wa.,Fullmetal Alchemist: Brotherhood,Gintama°,Steins;Gate,Gintama&#039;,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou,Hunter x Hunter (2011),Ginga Eiyuu Densetsu,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare,Gintama&#039;: Enchousen,...,Silent Chaser Kagami,Super Erotic Anime,Teleclub no Himitsu,Tenshi no Habataki Jun,The Satisfaction,Toushindai My Lover: Minami tai Mecha-Minami,Under World,Violence Gekiga David no Hoshi,Violence Gekiga Shin David no Hoshi: Inma Densetsu,Yasuji no Pornorama: Yacchimae!!
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kimi no Na wa.,1.000000,0.771823,0.638920,0.776177,0.690413,0.740548,0.796405,0.683376,0.595890,0.564999,...,-0.521560,-0.557240,-0.544084,-0.562622,-0.560951,-0.569760,-0.564853,-0.527463,-0.518295,-0.336687
Fullmetal Alchemist: Brotherhood,0.771823,1.000000,0.555943,0.976301,0.662207,0.522170,0.966699,0.474289,0.395627,0.446020,...,-0.286232,-0.279784,-0.282781,-0.277353,-0.278215,-0.274413,-0.276677,-0.284740,-0.285972,-0.285508
Gintama°,0.638920,0.555943,1.000000,0.564912,0.990054,0.757520,0.655011,0.645033,0.926702,0.991496,...,-0.517253,-0.564252,-0.546646,-0.571990,-0.569540,-0.582129,-0.575051,-0.525219,-0.513332,-0.432948
Steins;Gate,0.776177,0.976301,0.564912,1.000000,0.670630,0.509814,0.954879,0.479755,0.400408,0.454180,...,-0.306745,-0.302714,-0.304796,-0.300705,-0.301431,-0.298313,-0.300190,-0.305669,-0.306297,-0.301895
Gintama&#039;,0.690413,0.662207,0.990054,0.670630,1.000000,0.753044,0.744779,0.640408,0.897629,0.964096,...,-0.497042,-0.536064,-0.521556,-0.542231,-0.540295,-0.550355,-0.544723,-0.503580,-0.493635,-0.424719
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Toushindai My Lover: Minami tai Mecha-Minami,-0.569760,-0.274413,-0.582129,-0.298313,-0.550355,-0.617768,-0.379088,-0.538644,-0.578151,-0.578031,...,0.979374,0.997929,0.992819,0.999312,0.998944,1.000000,0.999652,0.983674,0.977342,0.709643
Under World,-0.564853,-0.276677,-0.575051,-0.300190,-0.544723,-0.609710,-0.379144,-0.525651,-0.570009,-0.570151,...,0.984366,0.999279,0.995631,0.999943,0.999809,0.999652,1.000000,0.988082,0.982589,0.711034
Violence Gekiga David no Hoshi,-0.527463,-0.284740,-0.525219,-0.305669,-0.503580,-0.553746,-0.372979,-0.442076,-0.514250,-0.515872,...,0.999746,0.993212,0.998138,0.989672,0.990904,0.983674,0.988082,1.000000,0.999477,0.709101
Violence Gekiga Shin David no Hoshi: Inma Densetsu,-0.518295,-0.285972,-0.513332,-0.306297,-0.493635,-0.540464,-0.370933,-0.423335,-0.501087,-0.503029,...,0.999952,0.988932,0.995644,0.984520,0.986036,0.977342,0.982589,0.999477,1.000000,0.706588


In [22]:
def recommend_anime(anime_name, cosine_df, top_n=5, min_similarity=0.5):
    if anime_name not in cosine_df.index:
        print(f"'{anime_name}' not found in the dataset.")
        return []


    sim_scores = cosine_df.loc[anime_name].drop(anime_name)


    sim_scores = sim_scores[sim_scores >= min_similarity]


    return sim_scores.sort_values(ascending=False).head(top_n)


In [23]:
recommend_anime('Fullmetal Alchemist: Brotherhood', cosine_df, top_n=10, min_similarity=0.6)

Unnamed: 0_level_0,Fullmetal Alchemist: Brotherhood
name,Unnamed: 1_level_1
Fullmetal Alchemist,0.993701
Shingeki no Kyojin,0.988777
Fairy Tail,0.98866
Akame ga Kill!,0.986022
Noragami,0.981955
Soul Eater,0.981589
Sword Art Online,0.980868
One Piece,0.978957
Ao no Exorcist,0.978868
Angel Beats!,0.978233


  Interview Questions : **bold text**

Q. 1) . Can you explain the difference between user-based and item-based collaborative filtering?

Ans->  
1)  User-Based Collaborative Filtering:

    1) Focus: Finds similar users based on their preferences or ratings.

    2) Recommendation Logic: If User A likes items X and Y, and User B likes item X, then recommend item Y to User B.

    3) Similarity Calculation: Based on user-to-user similarity (e.g., cosine similarity, Pearson correlation).

    4) Dynamic: Can be less stable as user preferences change over time.

    5) Cold Start Issue: Struggles when there are new users with no history.

    6) Example: "People similar to you also liked..."


2) Item-Based Collaborative Filtering:

    1) Focus: Finds similar items based on users’ ratings or interactions.

    2) Recommendation Logic: If items X and Y are rated similarly by many users, recommend item Y to users who liked item X.

    3) Similarity Calculation: Based on item-to-item similarity.

    4) Stable: More stable since item relationships change less frequently.

    5) Cold Start Issue: Struggles when there are new items with no ratings.

    6) Example: "People who liked this item also liked..."




Q . 2) . What is collaborative filtering, and how does it work?

Ans->
1) Collaborative Filtering:

    1) Definition: Collaborative filtering is a recommendation technique that suggests items to users based on the preferences and behavior of other users.

    2) Main Idea: It assumes that if two users have similar interests, they will like similar items.

    3) Types:

      > User-Based: Recommends items by finding similar users.

      > Item-Based: Recommends items that are similar to items the user has
        liked before.

    4) Working Mechanism:

      > Collect user-item interaction data (ratings, clicks, purchases).

      > Calculate similarity between users or items.

      > Predict missing ratings or preferences using these similarities.

    5) Strength: Doesn't need content information about the items — just user behavior data.

    6) Limitation: Suffers from cold start (new users/items) and sparsity (when data is missing for many items).

