In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from scipy.spatial import distance

In [None]:
# Load the anime dataset
data = pd.read_csv('anime.csv')

In [None]:
data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [None]:
# Data Preprocessing
# Fill missing ratings with a suitable value, for example, the mean rating
data['rating'].fillna(data['rating'].mean(), inplace=True)

In [None]:
# Normalize the rating values for clustering
scaler = StandardScaler()
data['rating_normalized'] = scaler.fit_transform(data['rating'].values.reshape(-1, 1))

In [None]:
# Create genre features using one-hot encoding
genres = data['genre'].str.get_dummies(sep=', ')
print(genres)

       Action  Adventure  Cars  Comedy  Dementia  Demons  Drama  Ecchi  \
0           0          0     0       0         0       0      1      0   
1           1          1     0       0         0       0      1      0   
2           1          0     0       1         0       0      0      0   
3           0          0     0       0         0       0      0      0   
4           1          0     0       1         0       0      0      0   
...       ...        ...   ...     ...       ...     ...    ...    ...   
12289       0          0     0       0         0       0      0      0   
12290       0          0     0       0         0       0      0      0   
12291       0          0     0       0         0       0      0      0   
12292       0          0     0       0         0       0      0      0   
12293       0          0     0       0         0       0      0      0   

       Fantasy  Game  ...  Shounen Ai  Slice of Life  Space  Sports  \
0            0     0  ...           0   

In [None]:
data = pd.concat([data, genres], axis=1)
print(data)

       anime_id                                               name  \
0         32281                                     Kimi no Na wa.   
1          5114                   Fullmetal Alchemist: Brotherhood   
2         28977                                           Gintama°   
3          9253                                        Steins;Gate   
4          9969                                      Gintama&#039;   
...         ...                                                ...   
12289      9316       Toushindai My Lover: Minami tai Mecha-Minami   
12290      5543                                        Under World   
12291      5621                     Violence Gekiga David no Hoshi   
12292      6133  Violence Gekiga Shin David no Hoshi: Inma Dens...   
12293     26081                   Yasuji no Pornorama: Yacchimae!!   

                                                   genre   type episodes  \
0                   Drama, Romance, School, Supernatural  Movie        1   
1      

In [None]:
# Perform KMeans clustering
kmeans = KMeans(n_clusters=10, random_state=42)
data['cluster'] = kmeans.fit_predict(data.iloc[:, 9:])  # Starting from the 'rating_normalized' column



In [None]:
# User Input
user_anime = input('Enter the anime you like and we will find more like those for you: ').lower()
# Get the cluster of the user's selected anime
user_cluster = int(data[data['name'].str.lower() == user_anime]['cluster'])


Enter the anime you like and we will find more like those for you: Gintama


In [None]:
# Get all anime in the same cluster as the user's anime
cluster_anime = data[data['cluster'] == user_cluster]

In [None]:
# Calculate recommendations based on genre similarity and ratings
recommendations = []

for i, anime in cluster_anime.iterrows():
    if anime['name'].lower() != user_anime:
        euclidean_distance = distance.euclidean(anime.iloc[9:-3], cluster_anime[cluster_anime['name'].str.lower() == user_anime].iloc[0, 9:-3])
        recommendations.append((anime['name'], anime['rating'], euclidean_distance))

# Sort recommendations by a combination of rating and genre similarity
recommendations.sort(key=lambda x: (x[1], x[2]), reverse=True)

In [None]:
# Prompt the user for the number of recommendations
num_recommendations = int(input('How many recommendations do you want? '))
# Print the recommended anime titles and their ratings
for i, (name, rating, distance) in enumerate(recommendations[:num_recommendations]):
    print(f'{i + 1}. {name} (Rating: {rating:.2f}, Genre Similarity: {distance:.2f})')


How many recommendations do you want? 5
1. Gintama° (Rating: 9.25, Genre Similarity: 0.00)
2. Gintama&#039; (Rating: 9.16, Genre Similarity: 0.00)
3. Hunter x Hunter (2011) (Rating: 9.13, Genre Similarity: 2.65)
4. Gintama&#039;: Enchousen (Rating: 9.11, Genre Similarity: 0.00)
5. Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare (Rating: 9.10, Genre Similarity: 0.00)
