In [54]:
import pandas as pd
import dill
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.pipeline import Pipeline

In [55]:
# ------------------------------------------
# Step 1: Load and clean the dataset
# ------------------------------------------

# Load the dataset
df = pd.read_csv("./dataset/anime.csv", low_memory=False, usecols=['anime_id','image_url',"describe",'english_name','score', 'genres','episodes','producers','studios', 'themes', 'rank', 'popularity']) 

In [56]:
# ------------------------------------------
# Step 2: Preprocess the data
# ------------------------------------------

# Drop duplicates and rows with missing values
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

# Combine multiple features into one string for content
def combine_features(row):
        return f"{row['describe']} {row['genres']} {row['producers']} {row['studios']} {row['themes']}"

df['combined_content'] = df.apply(combine_features, axis=1)

# Float to int
df['episodes'] = df['episodes'].astype(int)
df['rank'] = df['rank'].astype(int)

# Combine used columns
df = df[['anime_id', 'image_url', 'english_name', 'score', 'genres','episodes' ,'rank','studios','describe' ,'popularity','combined_content']].reset_index(drop=True)

In [None]:
# ------------------------------------------
# Step 2: Define the AnimeRecommender class
# ------------------------------------------
class AnimeRecommender(BaseEstimator, TransformerMixin):
    """Simple content-based anime recommender using TF-IDF and cosine similarity."""
    
    # Fit method
    def fit(self, X, y=None):
        self.vectorizer = TfidfVectorizer(stop_words='english')
        self.tfidf_matrix = self.vectorizer.fit_transform(X['combined_content'])
        self.similarity_matrix = cosine_similarity(self.tfidf_matrix)
        self.anime_df = X.reset_index(drop=True)
        self.anime_indices = pd.Series(self.anime_df.index, index=self.anime_df['english_name'].str.lower())
        return self

    # Transform method
    def transform(self, X):
        return self
    
    # Get anime index
    def get_anime_index(self, anime_name):
        # return self.anime_indices.get(anime_name.lower(), -1)

        matches = self.anime_df[self.anime_df['english_name'].str.lower() == anime_name.lower()]

        if not matches.empty:
            return matches.index[0]

        # Try partial matching if exact match not found
        matches = self.anime_df[self.anime_df['english_name'].str.lower().str.contains(anime_name.lower())]

        if not matches.empty:
            return matches.index[0]

        return -1

    # Recommend method
    def get_recommendations(self, anime_name, n=10):
        """""
        Args:
            anime_name (str): Name of the anime to base recommendations on
            n (int): Number of recommendations to return
        
        Returns:
            pandas.DataFrame: Dataframe with recommended anime
        """
        anime_idx = self.get_anime_index(anime_name)
        if anime_idx == -1:
            print(f"Anime '{anime_name}' not found.")
            print("Try searching for similar Anime :")

        similarity_scores = list(enumerate(self.similarity_matrix[anime_idx]))
        similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
        similar_indices = [i for i, _ in similarity_scores[1:n+1]]
        return self.anime_df.iloc[similar_indices].reset_index(drop=True)
    
    # Recommend by genre
    def get_recommendations_by_genre(self, genre, n=10):
        """"
        Args:
            genre (str): Genre to filter by
            n (int): Number of recommendations to return
        
        Returns:
            pandas.DataFrame: Dataframe with recommended anime
        """
        # Filter anime by genre
        # genre_matches = self.data[self.data['genres'].str.lower().str.contains(genre.lower())]
        genre_matches = self.anime_df[self.anime_df['genres'].str.lower().str.contains(genre.lower())]

        if genre_matches.empty:
            print(f"No anime found for genre '{genre}'.")
            # return pd.DataFrame()
        

        # Sort by score (descending)
        return genre_matches.sort_values('score', ascending=False).head(n).reset_index(drop=True)
    
    # Get top rated
    def get_top_rated_anime(self, n=10):
        """
        Get the top-rated anime.
        Args:
            n (int): Number of anime to return
        Returns:
            pandas.DataFrame: Dataframe with top-rated anime
        """
        # return self.data.sort_values('score', ascending=False).head(n).reset_index(drop=True)
        return self.anime_df.sort_values('score', ascending=False).head(n).reset_index(drop=True)
    
    # Get popular
    def get_popular_anime(self, n=10):
        """
        Get the most popular anime.

        Args:
            n (int): Number of anime to return

        Returns:
            pandas.DataFrame: Dataframe with popular anime
        """
        # return self.anime_df.sort_values('popularity').head(n).reset_index(drop=True)
        return self.anime_df.sort_values('popularity', ascending=False).head(n).reset_index(drop=True)
    
    # Search Anime
    def search_anime(self, query, n=10):
        """
        Search for anime by name.

        Args:
            query (str): Search query
            n (int): Maximum number of results to return

        Returns:
            pandas.DataFrame: Dataframe with search results
        """
        matches = self.anime_df[self.anime_df['english_name'].str.lower().str.contains(query.lower())]
        return matches.head(n).reset_index(drop=True)


In [58]:
# --------------------------------------------------
# Step 3: Build Pipeline
# --------------------------------------------------
anime_pipeline = Pipeline([
    ('AnimeRecommender', AnimeRecommender())
])

# Fit the pipeline
anime_pipeline.fit(df)

In [59]:
# --------------------------------------------------
# Step 4: Test the model
# --------------------------------------------------
# Get the recommender model
recommender_model = anime_pipeline.named_steps['AnimeRecommender']

#  Get recommendations  By Anime Name
get_recommendations = recommender_model.get_recommendations("fboiefbioearjhoier")
get_recommendations

Anime 'fboiefbioearjhoier' not found.
Try searching for similar titles:


Unnamed: 0,anime_id,image_url,english_name,score,genres,episodes,rank,studios,describe,popularity,combined_content
0,2692,https://cdn.myanimelist.net/images/anime/1318/...,CB Character: Nagai Gou World,6.78,"Action, Comedy",3,5427,Triangle Staff,Various characters from Go Nagai works includi...,9466,Various characters from Go Nagai works includi...
1,1628,https://cdn.myanimelist.net/images/anime/5/848...,Demon Lord Dante,5.96,"Action, Horror, Supernatural",13,10096,Magic Bus,"While sleeping one night, Ryo Utsugi had a nig...",6279,"While sleeping one night, Ryo Utsugi had a nig..."
2,3898,https://cdn.myanimelist.net/images/anime/1446/...,Mazinger Z vs. Devilman,6.1,"Action, Adventure, Sci-Fi",1,9378,Toei Animation,The heroes of the series Mazinger Z and Devilm...,8726,The heroes of the series Mazinger Z and Devilm...
3,2745,https://cdn.myanimelist.net/images/anime/1091/...,Hell Teacher Nube,7.13,"Action, Comedy, Horror, Supernatural",3,3758,Toei Animation,Continuation of the TV series.,9612,"Continuation of the TV series. Action, Comedy,..."
4,37590,https://cdn.myanimelist.net/images/anime/1046/...,Devilman: Crybaby - Digest Video,6.22,"Action, Horror, Supernatural",1,8756,Science SARU,A short web recap of the Devilman: Crybaby ser...,6338,A short web recap of the Devilman: Crybaby ser...
5,4617,https://cdn.myanimelist.net/images/anime/3/245...,Seven Cities Story: Arctic Front,6.15,"Drama, Sci-Fi",2,9141,Studio Junio,"In 2099, Earth has shifted off its axis and be...",10132,"In 2099, Earth has shifted off its axis and be..."
6,1268,https://cdn.myanimelist.net/images/anime/3/615...,Aquarian Age: The Movie,5.75,"Action, Fantasy",1,11155,Madhouse,During the chaotic period known in Western ast...,9587,During the chaotic period known in Western ast...
7,6884,https://cdn.myanimelist.net/images/anime/4/171...,Spice and Wolf II Specials,7.05,Supernatural,2,4167,"Brain's Base, Marvy Jack",Extra short anime featuring Holo from BD volum...,2932,Extra short anime featuring Holo from BD volum...
8,1358,https://cdn.myanimelist.net/images/anime/2/250...,Fist of the North Star: The Movie,7.21,"Action, Drama, Sci-Fi",1,3341,Toei Animation,"Following a cataclysmic nuclear war, the world...",4870,"Following a cataclysmic nuclear war, the world..."
9,1983,https://cdn.myanimelist.net/images/anime/4/403...,Black Jack: The Two Doctors of Darkness,7.15,Drama,1,3644,Tezuka Productions,The mega-corporate Daedalus Group is under att...,8501,The mega-corporate Daedalus Group is under att...


In [60]:
# Get recommendations  By Genre
get_recommendations_by_genre = recommender_model.get_recommendations_by_genre("action")
get_recommendations_by_genre.head(2)

Unnamed: 0,anime_id,image_url,english_name,score,genres,episodes,rank,studios,describe,popularity,combined_content
0,5114,https://cdn.myanimelist.net/images/anime/1208/...,Fullmetal Alchemist: Brotherhood,9.1,"Action, Adventure, Drama, Fantasy",64,2,Bones,After a horrific alchemy experiment goes wrong...,3,After a horrific alchemy experiment goes wrong...
1,38524,https://cdn.myanimelist.net/images/anime/1517/...,Attack on Titan Season 3 Part 2,9.05,"Action, Drama, Suspense",10,5,Wit Studio,Seeking to restore humanity's diminishing hope...,21,Seeking to restore humanity's diminishing hope...


In [61]:
# Get top rated anime
get_top_rated_anime = recommender_model.get_top_rated_anime()
get_top_rated_anime.head(2)

Unnamed: 0,anime_id,image_url,english_name,score,genres,episodes,rank,studios,describe,popularity,combined_content
0,5114,https://cdn.myanimelist.net/images/anime/1208/...,Fullmetal Alchemist: Brotherhood,9.1,"Action, Adventure, Drama, Fantasy",64,2,Bones,After a horrific alchemy experiment goes wrong...,3,After a horrific alchemy experiment goes wrong...
1,9253,https://cdn.myanimelist.net/images/anime/1935/...,Steins;Gate,9.07,"Drama, Sci-Fi, Suspense",24,3,White Fox,Eccentric scientist Rintarou Okabe has a never...,14,Eccentric scientist Rintarou Okabe has a never...


In [62]:
# Get popular anime
get_popular_anime = recommender_model.get_popular_anime()
get_popular_anime.head(2)

Unnamed: 0,anime_id,image_url,english_name,score,genres,episodes,rank,studios,describe,popularity,combined_content
0,58844,https://cdn.myanimelist.net/images/anime/1852/...,Luchika the Hedgehog,6.14,"Fantasy, Slice of Life",1,9166,Fanworks,An adaptation of the picture book Harinezumi n...,19163,An adaptation of the picture book Harinezumi n...
1,39230,https://cdn.myanimelist.net/images/anime/1996/...,Beanie's Daily,6.41,Slice of Life,16,7643,Making Animation,A daily of a cute red bean struggles to get ea...,18039,A daily of a cute red bean struggles to get ea...


In [63]:
# Search anime
search_anime = recommender_model.search_anime("naruto")
search_anime.head(2)

Unnamed: 0,anime_id,image_url,english_name,score,genres,episodes,rank,studios,describe,popularity,combined_content
0,1735,https://cdn.myanimelist.net/images/anime/1565/...,Naruto Shippuden,8.28,"Action, Adventure, Fantasy",500,306,Pierrot,It has been two and a half years since Naruto ...,16,It has been two and a half years since Naruto ...
1,20,https://cdn.myanimelist.net/images/anime/1141/...,Naruto,8.01,"Action, Adventure, Fantasy",220,659,Pierrot,"Moments before Naruto Uzumaki's birth, a huge ...",9,"Moments before Naruto Uzumaki's birth, a huge ..."


In [65]:
# --------------------------------------------------
# Step 5: Save the model
# --------------------------------------------------
dill.dump(anime_pipeline, open("model/anime_recommender.pkl", "wb"))