In [None]:
# Parameters
user_anime_titles = "[\"Boku no Hero Academia\"]"


In [None]:
!pip install requests pandas


In [None]:
import requests
import pandas as pd

# Function to fetch anime data with pagination
def fetch_anime_data_paged(limit=4500):
    anime_list = []
    current_page = 1
    fetched_count = 0

    while fetched_count < limit:
        try:
            url = f"https://api.jikan.moe/v4/anime?page={current_page}"
            response = requests.get(url)

            if response.status_code == 200:
                data = response.json().get('data', [])
                if not data:
                    print("No more anime data available.")
                    break

                for anime in data:
                    anime_data = {
                        'id': anime.get('mal_id'),
                        'title': anime.get('title'),
                        'genres': [genre['name'] for genre in anime.get('genres', [])],
                        'popularity': anime.get('popularity'),
                        'rating': anime.get('score'),
                        'description': anime.get('synopsis'),
                    }
                    anime_list.append(anime_data)
                    fetched_count += 1
                    if fetched_count >= limit:
                        break

                current_page += 1
            else:
                print(f"Error fetching page {current_page}: HTTP {response.status_code}")
                break
        except Exception as e:
            print(f"Error fetching page {current_page}: {e}")
            break

    return anime_list

anime_data = fetch_anime_data_paged(limit=4500)

anime_df = pd.DataFrame(anime_data)

anime_df.to_csv('raw_anime_data_paged.csv', index=False)
print("Anime data saved successfully!")



In [None]:
print(anime_df.head())


In [None]:
from sklearn.preprocessing import MinMaxScaler

anime_df = pd.read_csv('raw_anime_data_paged.csv')
anime_df = anime_df.drop_duplicates(subset='title', keep='first')
anime_df['rating'] = anime_df['rating'].fillna(anime_df['rating'].mean())
anime_df['description'] = anime_df['description'].fillna("No description available.")
anime_df['genres'] = anime_df['genres'].apply(lambda x: [g.lower() for g in eval(x)] if pd.notna(x) else [])
anime_df['popularity'] = anime_df['popularity'].fillna(anime_df['popularity'].max())
threshold = anime_df["popularity"].quantile(0.5)

# Filter less popular anime
less_popular_anime = anime_df[anime_df["popularity"] > threshold]

# Save the filtered dataset if needed
less_popular_anime.to_csv('less_popular_anime.csv', index=False)
print("Filtered less popular anime saved successfully!")


In [None]:
#feature engineering
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# TF-IDF for descriptions
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(anime_df['description'])

# Compute similarity
similarity_matrix = cosine_similarity(tfidf_matrix)


In [None]:
# Fetch anime details function
def fetch_anime_details(title):
    try:
        url = f"https://api.jikan.moe/v4/anime?q={title}&limit=1"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json().get('data', [])
            if data:
                anime = data[0]
                return {
                    'id': anime.get('mal_id'),
                    'title': anime.get('title'),
                    'genres': [genre['name'] for genre in anime.get('genres', [])],
                    'description': anime.get('synopsis', ""),
                    'rating': anime.get('score', 0)
                }
        else:
            print(f"Error fetching details for '{title}': HTTP {response.status_code}")
    except Exception as e:
        print(f"Error fetching details for '{title}': {e}")
    return None


In [None]:
# Recommendation function with error handling and filtering of indices
def recommend_less_popular(fetched_anime, num_recommendations=5):
    # Add fetched anime descriptions to the TF-IDF vector space
    fetched_descriptions = [anime['description'] for anime in fetched_anime if anime]

    if not fetched_descriptions:
        print("No valid descriptions available for recommendations.")
        return []

    fetched_features = tfidf.transform(fetched_descriptions).mean(axis=0).A1
    similarity_scores = cosine_similarity(fetched_features.reshape(1, -1), tfidf_matrix)

    # Rank less popular anime by similarity
    scores = [(i, similarity_scores[0, i]) for i in range(len(similarity_scores[0])) if i < len(less_popular_anime)]

    # Debugging: Print the filtered scores
    print("Filtered Scores (valid indices only):", scores)

    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    # Check if there are enough recommendations available
    if len(scores) < num_recommendations:
        print(f"Only {len(scores)} less popular anime found for recommendations.")
        num_recommendations = len(scores)

    # Return top recommendations
    recommendations = []
    for i in scores[:num_recommendations]:
        try:
            recommendations.append(less_popular_anime.iloc[i[0]]['title'])
        except IndexError as e:
            print(f"IndexError while accessing less_popular_anime: {e}")
            print(f"Current index being accessed: {i[0]}")
            continue  # Skip this index if there's an error

    return recommendations

In [None]:
if __name__ == "__main__":
    import sys
    import json

    # Read the input titles from the parameters
    user_anime_titles = json.loads(sys.argv[1])  # Expecting a JSON string

    fetched_anime = []
    for title in user_anime_titles:
        title = title.strip()
        anime_details = fetch_anime_details(title)
        if anime_details:
            fetched_anime.append(anime_details)

    # Generate recommendations
    recommendations = []
    if fetched_anime:
        recommendations = recommend_less_popular(fetched_anime, num_recommendations=5)

    # Save recommendations to a CSV file
    recommendations_df = pd.DataFrame(recommendations, columns=["Recommended Titles"])
    recommendations_df.to_csv('anime_recommendations.csv', index=False)

    print("Recommendations saved successfully!")