In [None]:
import pandas as pd
pd.set_option("display.max.columns", None)
anime_master_df = pd.read_csv(r"C:\Users\PC\Desktop\top_anime_dataset_v2.csv")
anime_master_df.head()

In [None]:
print(anime_master_df['themes'].isnull().sum())
anime_master_df.info()

In [None]:
anime_master_df.head()

In [None]:
anime_master_df.columns

In [None]:
anime_master_df['synopsis'][1]

In [None]:
anime_master_df['episodes'] = pd.to_numeric(anime_master_df['episodes'], errors = 'coerce')

In [None]:
selected_features = ['genres', 'themes', 'demographics', 'synopsis', 'type', 'producers', 'source']

for col in selected_features:
    anime_master_df[col] = anime_master_df[col].fillna('')

In [None]:
anime_master_df.head()

In [None]:
anime_master_df['combined_features'] = (
    anime_master_df['genres'] + ' ' +
    anime_master_df['themes'] + ' ' +
    anime_master_df['demographics'] + ' ' +
    anime_master_df['synopsis'] + ' ' +
    anime_master_df['type'] + ' ' +
    anime_master_df['producers'] + ' ' +
    anime_master_df['source']
)

In [None]:
anime_master_df.combined_features[0]

In [None]:
import re
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def preprocess(text):
    if not isinstance(text, str):
        return ""
    ps = PorterStemmer()
    text = text.lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'\n', '', text)
    text = " ".join(ps.stem(word) for word in text.split())
    
    return text

anime_master_df['combined_features'] = anime_master_df['combined_features'].apply(preprocess)

In [None]:
anime_master_df.head()

In [104]:
tf_vec = TfidfVectorizer(stop_words = 'english')
vectorized_features = tf_vec.fit_transform(anime_master_df['combined_features'])

In [105]:
similarity = cosine_similarity(vectorized_features)

In [106]:
movie = input('Movie: ')

Movie:  Naruto


In [107]:
input_idx = anime_master_df[anime_master_df['name'] == movie].index[0].item()
input_idx

672

In [109]:
def recommend(movie):
    recommended_anime = []
    recommended_anime_poster = []
    input_idx = anime_master_df[anime_master_df['name'] == movie].index[0].item()
    similarity_score = list(enumerate(similarity[input_idx]))
    sorted_similarity_score = sorted(similarity_score, key = lambda x: x[1], reverse = True)

    candidates = []
    for each in sorted_similarity_score[1:11]:
        idx = each[0]
        candidates.append({
            'name': anime_master_df.iloc[idx]['name'],
            'poster': anime_master_df.iloc[idx]['image_url'],
            'score': anime_master_df.iloc[idx]['score']
        })

    candidates = sorted(candidates, key = lambda x: x['score'], reverse = True)

    recommended_anime = [m['name'] for m in candidates[:5]]
    recommended_anime_poster = [m['poster'] for m in candidates[:5]]
    
    return recommended_anime, recommended_anime_poster

In [110]:
recommend(movie)

(['Naruto: Shippuuden',
  'Naruto: Shippuuden Movie 6 - Road to Ninja',
  'Naruto: Shippuuden Movie 5 - Blood Prison',
  'Naruto: Shippuuden Movie 4 - The Lost Tower',
  'Boruto: Naruto the Movie'],
 ['https://cdn.myanimelist.net/images/anime/1565/111305.jpg',
  'https://cdn.myanimelist.net/images/anime/1620/94336.jpg',
  'https://cdn.myanimelist.net/images/anime/1500/134496.jpg',
  'https://cdn.myanimelist.net/images/anime/1479/116734.jpg',
  'https://cdn.myanimelist.net/images/anime/4/78280.jpg'])

In [None]:
pred_list = [20, 0.975]

anime_master_df.iloc[pred_list[0]]['name']

In [None]:
pred_list[0]

In [None]:
similarity_matrix_score = list(enumerate(similarity_matrix[input_idx]))

sorted_similarity_matrix_score = sorted(similarity_matrix_score, key = lambda x: x[1], reverse = True)

    

In [111]:
anime_master_df['episodes'] = anime_master_df['episodes'].fillna('')

In [112]:
anime_cols = ['anime_id', 'anime_url', 'image_url', 'name', 'score', 'themes', 'demographics', 'synopsis', 'type', 'episodes', 'producers', 'source', 'combined_features']
animedf = anime_master_df[anime_cols]

In [121]:
# def to_lower(text):
#     text = text.lower()
#     return text
    
# animedf['name'] = animedf['name'].apply(lambda x: to_lower(x))

In [None]:
animedf.head()

In [115]:
import pickle

with open('artifacts/similarity_matrix.pkl', 'wb') as f:
    pickle.dump(similarity_matrix, f)

In [116]:
with open('artifacts/anime_data.csv', 'wb') as f:
    pickle.dump(animedf, f)

In [144]:
url = 'https://api.jikan.moe/v4/seasons/now?limit=25'

response = requests.get(url)
data = response.json()
data['data']

new_anime_list = []
for item in data['data']:
    new_anime_list.append({
        'anime_id': item['mal_id'],
        'anime_url': item['url'],
        'image_url': item['images']['jpg']['image_url'],
        'name': item['title'],
        'score': item.get('score', None),
        'themes': [t["name"] for t in item.get("themes", [])],
        'demographics': [d["name"] for d in item.get("demographics", [])],
        'synopsis': item.get('synopsis', ''),
        'type': item.get('type', ''),
        'episodes': item.get("episodes", None),
        'producers': [p["name"] for p in item.get("producers", [])],
        'source': item.get("source", ""),
        'combined_features': None
    })

new_df = pd.DataFrame(new_anime_list)

In [148]:
new_df = new_df.sort_values(by = 'score', ascending = False).head()

In [149]:
from datetime import datetime

date_str = datetime.now().strftime("%d-%m-%Y")

with open('artifacts/trending_df.csv', 'wb') as f:
    pickle.dump(new_df, f)

In [None]:
animedf.sort_values(by = 'score', ascending = False).head()