In [1]:
import pandas as pd

# Load the dataset
anime_df = pd.read_csv('/content/anime.csv')


In [3]:
# Check for missing values
print(anime_df.isnull().sum())




anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64


In [4]:
# Get a summary of the dataset
print(anime_df.info())
print(anime_df.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB
None
   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Advent

In [5]:
# One-hot encoding for genres
genre_dummies = anime_df['genre'].str.get_dummies(sep=', ')
anime_df = pd.concat([anime_df, genre_dummies], axis=1)


In [7]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# ... (your previous code) ...

# Replace 'Unknown' with NaN in 'rating' and 'episodes' columns
anime_df['rating'] = pd.to_numeric(anime_df['rating'], errors='coerce')
anime_df['episodes'] = pd.to_numeric(anime_df['episodes'], errors='coerce')

# Now apply the scaler
scaler = MinMaxScaler()
anime_df[['rating', 'episodes']] = scaler.fit_transform(anime_df[['rating', 'episodes']])

In [9]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.impute import SimpleImputer

# ... (your previous code) ...

# Select features for similarity computation
feature_columns = ['rating', 'episodes'] + list(genre_dummies.columns)
anime_features = anime_df[feature_columns]

# Impute missing values using the mean
imputer = SimpleImputer(strategy='mean') # or strategy='median', 'most_frequent'
anime_features = imputer.fit_transform(anime_features)

# Alternatively, you can drop rows with missing values:
# anime_features = anime_features.dropna()

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(anime_features)

In [10]:
def recommend_anime(title, cosine_sim, anime_df, top_n=5):
    # Get the index of the anime that matches the title
    idx = anime_df.index[anime_df['title'] == title].tolist()[0]

    # Get the pairwise similarity scores of all anime with that anime
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the anime based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the top_n most similar anime
    sim_scores = sim_scores[1:top_n+1]  # Skip the first one since it is the anime itself

    # Get the anime indices
    anime_indices = [i[0] for i in sim_scores]

    # Return the top_n most similar anime
    return anime_df.iloc[anime_indices][['title', 'rating']]


In [14]:
def recommend_anime_with_threshold(title, cosine_sim, anime_df, threshold=0.5):
    idx = anime_df.index[anime_df['title'] == title].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Filter by threshold
    filtered_sim_scores = [s for s in sim_scores if s[1] > threshold]

    # Sort the filtered anime based on the similarity scores
    filtered_sim_scores = sorted(filtered_sim_scores, key=lambda x: x[1], reverse=True)

    anime_indices = [i[0] for i in filtered_sim_scores[1:6]]  # Top 5 excluding itself
    return anime_df.iloc[anime_indices][['title', 'rating']]


In [12]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(anime_df, test_size=0.2, random_state=42)


In [13]:
# Example: precision, recall calculations can go here
# Placeholder function
def evaluate_recommendations(recommended, actual):
    # Placeholder for precision, recall, F1-score calculations
    pass
