# Recommendation system

# Data Preprocessing

 Load the Dataset

In [1]:
import pandas as pd
df = pd.read_csv(r"C:\Users\G.S.AZARUDDIN\Downloads\anime.csv") 
print(df.head())


   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  


Handle Missing Values

In [3]:
print(df.isnull().sum())
df = df.dropna(subset=['type', 'genre', 'rating'])


anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64


Explore the Dataset

In [15]:
print(df.info())
print(df.describe())
print(df['rating'].unique())
print(df['type'].unique())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 47 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   type           6 non-null      int8   
 1   anime_id       6 non-null      int64  
 2   rating         6 non-null      float64
 3   members        6 non-null      int64  
 4   Action         6 non-null      float64
 5   Adventure      6 non-null      float64
 6   Cars           6 non-null      float64
 7   Comedy         6 non-null      float64
 8   Dementia       6 non-null      float64
 9   Demons         6 non-null      float64
 10  Drama          6 non-null      float64
 11  Ecchi          6 non-null      float64
 12  Fantasy        6 non-null      float64
 13  Game           6 non-null      float64
 14  Harem          6 non-null      float64
 15  Hentai         6 non-null      float64
 16  Historical     6 non-null      float64
 17  Horror         6 non-null      float64
 18  Josei         

# 2. Feature Extraction


Convert Categorical Features into Numerical Representations
Genres: Convert the genre column into a one-hot encoded format or a binary matrix.

In [81]:
df['genre'] = df['genre'].str.split(',').apply(lambda x: [genre.strip() for genre in x])
df = df.explode('genre')
genre_dummies = pd.get_dummies(df['genre'])
df = df.join(genre_dummies).groupby(['name'], as_index=False).sum()

KeyError: 'genre'

In [50]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df['rating'] = scaler.fit_transform(df[['rating']])

# 3. Recommendation System

In [53]:
print(df.columns)

Index(['rating', 'anime_id', 'members', 'Action', 'Adventure', 'Cars',
       'Comedy', 'Dementia', 'Demons', 'Drama', 'Ecchi', 'Fantasy', 'Game',
       'Harem', 'Hentai', 'Historical', 'Horror', 'Josei', 'Kids', 'Magic',
       'Martial Arts', 'Mecha', 'Military', 'Music', 'Mystery', 'Parody',
       'Police', 'Psychological', 'Romance', 'Samurai', 'School', 'Sci-Fi',
       'Seinen', 'Shoujo', 'Shoujo Ai', 'Shounen', 'Shounen Ai',
       'Slice of Life', 'Space', 'Sports', 'Super Power', 'Supernatural',
       'Thriller', 'Vampire', 'Yaoi', 'Yuri', '0', '1', '2', '3', '4', '5'],
      dtype='object')


In [76]:
from sklearn.metrics.pairwise import cosine_similarity
features = df.drop(columns=['Action']) 
similarity_matrix = cosine_similarity(features)
similarity_df = pd.DataFrame(similarity_matrix, index=df['Action'], columns=df['Action'])

In [77]:
def recommend_anime(target_anime, similarity_df, top_n=5):
    if target_anime not in similarity_df.index:
        return f"Anime '{target_anime}' not found in the dataset."
    sim_scores = similarity_df[target_anime]
    similar_animes = sim_scores.sort_values(ascending=False).head(top_n + 1)
    similar_animes = similar_animes[similar_animes.index != target_anime]
    return similar_animes
print(recommend_anime('One Punch Man', similarity_df, top_n=5))


Anime 'OnePunchMan' not found in the dataset.


# 4. Evaluation

In [78]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_features = train_df.drop(columns=['Action'])
train_similarity_matrix = cosine_similarity(train_features)
train_similarity_df = pd.DataFrame(train_similarity_matrix, index=train_df['Action'], columns=train_df['Action'])


In [80]:
def evaluate_recommendations(test_df, similarity_df, top_n=5):
    results = []
    for target_anime in test_df['Action']:
        recommendations = recommend_anime(target_anime, similarity_df, top_n)
        results.append(recommendations)
        return results
print(evaluate_recommendations(test_df, train_similarity_df, top_n=5))


["Anime '59.0' not found in the dataset."]


# 5. Analyze 

Analyze Performance:
    Evaluate the quality of recommendations by manually inspecting the results or using feedback from actual users if available