In [76]:
import pandas as pd
import dill
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.pipeline import Pipeline

In [59]:
# ------------------------------------------
# Step 1: Load and clean the dataset
# ------------------------------------------

# Load the dataset
df = pd.read_csv("./dataset/anime.csv", low_memory=False, usecols=['anime_id','image_url',"describe",'english_name','score', 'genres','episodes','producers','studios', 'themes', 'rank', 'popularity']) 

In [60]:
# ------------------------------------------
# Step 2: Preprocess the data
# ------------------------------------------

# Drop duplicates and rows with missing values
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

# Combine multiple features into one string for content
def combine_features(row):
        return f"{row['describe']} {row['genres']} {row['producers']} {row['studios']} {row['themes']}"

df['combined_content'] = df.apply(combine_features, axis=1)

# Float to int
df['episodes'] = df['episodes'].astype(int)
df['rank'] = df['rank'].astype(int)

# Combine used columns
df = df[['anime_id', 'image_url', 'english_name', 'score', 'genres','episodes' ,'rank','studios','describe' ,'popularity','combined_content']].reset_index(drop=True)

In [None]:
# ------------------------------------------
# Step 2: Define the AnimeRecommender class
# ------------------------------------------
class AnimeRecommender(BaseEstimator, TransformerMixin):
    """Simple content-based anime recommender using TF-IDF and cosine similarity."""
    
    # Fit method
    def fit(self, X, y=None):
        self.vectorizer = TfidfVectorizer(stop_words='english')
        self.tfidf_matrix = self.vectorizer.fit_transform(X['combined_content'])
        self.similarity_matrix = cosine_similarity(self.tfidf_matrix)
        self.anime_df = X.reset_index(drop=True)
        self.anime_indices = pd.Series(self.anime_df.index, index=self.anime_df['english_name'].str.lower())
        return self

    # Transform method
    def transform(self, X):
        return self
    
    # Get anime index
    def get_anime_index(self, anime_name):
        return self.anime_indices.get(anime_name.lower(), -1)
    
    """""
    Args:
        anime_name (str): Name of the anime to base recommendations on
        n (int): Number of recommendations to return
            
    Returns:
        pandas.DataFrame: Dataframe with recommended anime
    """

    # Recommend method
    def get_recommendations(self, anime_name, n=10):
        anime_idx = self.get_anime_index(anime_name)
        if anime_idx == -1:
            print(f"Anime '{anime_name}' not found.")
            return pd.DataFrame()

        similarity_scores = list(enumerate(self.similarity_matrix[anime_idx]))
        similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
        similar_indices = [i for i, _ in similarity_scores[1:n+1]]
        return self.anime_df.iloc[similar_indices].reset_index(drop=True)


In [None]:
# --------------------------------------------------
# Step 3: Build Pipeline
# --------------------------------------------------
anime_pipeline = Pipeline([
    ('recommender', AnimeRecommender())
])

# Fit the pipeline
anime_pipeline.fit(df)

In [68]:
# --------------------------------------------------
# Step 4: Make a recommendation
# --------------------------------------------------
# Access the recommender step and call recommend
recommender_model = anime_pipeline.named_steps['recommender']
result = recommender_model.get_recommendations("solo leveling")
print(result)

   anime_id                                          image_url  \
0     58567  https://cdn.myanimelist.net/images/anime/1448/...   
1      2025  https://cdn.myanimelist.net/images/anime/5/195...   
2     50275  https://cdn.myanimelist.net/images/anime/1656/...   
3     15651  https://cdn.myanimelist.net/images/anime/12/54...   
4     28537  https://cdn.myanimelist.net/images/anime/2/760...   
5      2200  https://cdn.myanimelist.net/images/anime/1946/...   
6      3958  https://cdn.myanimelist.net/images/anime/3/197...   
7     32494  https://cdn.myanimelist.net/images/anime/11/88...   
8      2171  https://cdn.myanimelist.net/images/anime/1765/...   
9     40483  https://cdn.myanimelist.net/images/anime/1558/...   

                                        english_name  score  \
0      Solo Leveling Season 2: Arise from the Shadow   8.87   
1                                  Darker than Black   8.05   
2  Sword Art Online the Movie: Progressive - Sche...   7.69   
3                    

In [79]:
# --------------------------------------------------
# Step 5: Save the model
# --------------------------------------------------
dill.dump(anime_pipeline, open("model/anime_recommender.pkl", "wb"))