# Assignment 20: Recommendation System

## Dataset: Anime Ratings

**Topics Covered:**
- Collaborative Filtering
- Content-Based Filtering
- Similarity Measures

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Load data
df = pd.read_csv('anime.csv')
print("Dataset loaded! Shape:", df.shape)
df.head()

In [None]:
# Check data info
print(df.columns.tolist())
print("\nMissing values:")
print(df.isnull().sum())

In [None]:
# Clean data
df = df.dropna(subset=['name'])

# Fill missing ratings with mean
if 'rating' in df.columns:
    df['rating'] = df['rating'].fillna(df['rating'].mean())

print("After cleaning:", df.shape)

In [None]:
# Content-Based Filtering using Genre
print("=== Content-Based Filtering ===")

# Check for genre column
if 'genre' in df.columns:
    # Fill missing genres
    df['genre'] = df['genre'].fillna('')
    
    # Create TF-IDF matrix from genres
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['genre'])
    
    print("TF-IDF matrix shape:", tfidf_matrix.shape)
    
    # Compute similarity
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    print("Similarity matrix shape:", cosine_sim.shape)

In [None]:
# Create index for lookup
indices = pd.Series(df.index, index=df['name']).drop_duplicates()

# Function to get recommendations
def get_recommendations(title, cosine_sim=cosine_sim, n=10):
    # Get index of the anime
    if title not in indices:
        return "Anime not found!"
    
    idx = indices[title]
    
    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort by similarity
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get top n (excluding itself)
    sim_scores = sim_scores[1:n+1]
    
    # Get anime indices
    anime_indices = [i[0] for i in sim_scores]
    
    return df['name'].iloc[anime_indices]

print("Recommendation function created!")

In [None]:
# Test recommendations
print("=== Sample Recommendations ===")

# Get a sample anime
sample_anime = df['name'].iloc[0]
print("\nRecommendations for:", sample_anime)
print("-" * 40)

recommendations = get_recommendations(sample_anime)
if isinstance(recommendations, pd.Series):
    for i, anime in enumerate(recommendations):
        print(str(i+1) + ".", anime)

In [None]:
# Popularity-based recommendations (as baseline)
print("=== Popularity-Based Recommendations ===")

if 'rating' in df.columns and 'members' in df.columns:
    # Sort by rating and members
    popular = df.sort_values(['rating', 'members'], ascending=False)
    print("\nTop 10 Most Popular Anime:")
    print(popular[['name', 'rating', 'members']].head(10))
elif 'rating' in df.columns:
    popular = df.sort_values('rating', ascending=False)
    print("\nTop 10 Highest Rated:")
    print(popular[['name', 'rating']].head(10))

## Summary

**Types of Recommendation Systems:**

1. **Content-Based Filtering**
   - Uses item features (genre, description)
   - Recommends similar items

2. **Collaborative Filtering**
   - Uses user-item interactions
   - Recommends based on similar users

3. **Hybrid Methods**
   - Combines both approaches