In [1]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests

url = "https://api.jikan.moe/v4/anime?q=naruto&limit=3"
response = requests.get(url)
data = response.json()

print(data["data"][0]["title"])
print(data["data"][0]["genres"])
print(data["data"][0]["synopsis"])

Naruto
[{'mal_id': 1, 'type': 'anime', 'name': 'Action', 'url': 'https://myanimelist.net/anime/genre/1/Action'}, {'mal_id': 2, 'type': 'anime', 'name': 'Adventure', 'url': 'https://myanimelist.net/anime/genre/2/Adventure'}, {'mal_id': 10, 'type': 'anime', 'name': 'Fantasy', 'url': 'https://myanimelist.net/anime/genre/10/Fantasy'}]
Twelve years ago, a colossal demon fox terrorized the world. During the monster's attack on the Hidden Leaf Village, the Hokage—the village's leader and most powerful ninja—sacrifices himself to seal the beast inside a newborn, relieving civilization from destruction while dooming the baby to a lonely life.

Now, after years of being shunned and bullied, Naruto Uzumaki pesters the village with elaborate pranks and vandalism. Despite these antics, he works hard to achieve his dream: to become the Hokage and earn the acknowledgement of those who have mistreated him for his entire life. Naruto joins Team 7, a ninja squad made up of two of his peers—prodigy Sasuk

In [3]:
import pandas as pd
import time
anime_list = []
for page in [1, 2]:
    url = f"https://api.jikan.moe/v4/anime?page={page}&limit=25"
    response = requests.get(url)
    data = response.json()
    
    for anime in data["data"]:
        anime_list.append({
            "title" : anime["title"],
            "genres": [g["name"] for g in anime["genres"]],
            "synopsis" : anime["synopsis"]
        })
    time.sleep(1)
df = pd.DataFrame(anime_list)
df.to_csv("anime.csv", index = False)
print("Saved anime.csv")


    

Saved anime.csv


In [4]:
df.head()

Unnamed: 0,title,genres,synopsis
0,Cowboy Bebop,"[Action, Award Winning, Sci-Fi]","Crime is timeless. By the year 2071, humanity ..."
1,Cowboy Bebop: Tengoku no Tobira,"[Action, Sci-Fi]","Another day, another bounty—such is the life o..."
2,Trigun,"[Action, Adventure, Sci-Fi]","Vash the Stampede is the man with a $$60,000,0..."
3,Witch Hunter Robin,"[Action, Drama, Mystery, Supernatural]","Though hidden away from the general public, Wi..."
4,Bouken Ou Beet,"[Action, Adventure, Fantasy]",It is the dark century and the people are suff...


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   title     50 non-null     object
 1   genres    50 non-null     object
 2   synopsis  50 non-null     object
dtypes: object(3)
memory usage: 1.3+ KB


In [7]:
df.sample(5)

Unnamed: 0,title,genres,synopsis
46,Rozen Maiden: Träumend,"[Action, Drama, Supernatural]","As the story of Rozen Maiden closes, a new cha..."
5,Eyeshield 21,[Sports],"Shy, reserved, and small-statured, Deimon High..."
39,Blue Gender,"[Adventure, Drama, Horror, Sci-Fi]",Blue Gender takes place in the not too distant...
16,Texhnolyze,"[Avant Garde, Drama, Mystery, Sci-Fi, Suspense]","In the dark underground city of Lux, people li..."
1,Cowboy Bebop: Tengoku no Tobira,"[Action, Sci-Fi]","Another day, another bounty—such is the life o..."


In [8]:
df.isnull().sum()

title       0
genres      0
synopsis    0
dtype: int64

In [9]:
df['genres'].iloc[0]

['Action', 'Award Winning', 'Sci-Fi']

In [6]:
df["genres_str"] = df["genres"].apply(lambda x: " ".join(x))

In [7]:
df["combined_text"] = df["genres_str"]+ " " + df["synopsis"]

In [8]:
df.head()

Unnamed: 0,title,genres,synopsis,genres_str,combined_text
0,Cowboy Bebop,"[Action, Award Winning, Sci-Fi]","Crime is timeless. By the year 2071, humanity ...",Action Award Winning Sci-Fi,Action Award Winning Sci-Fi Crime is timeless....
1,Cowboy Bebop: Tengoku no Tobira,"[Action, Sci-Fi]","Another day, another bounty—such is the life o...",Action Sci-Fi,"Action Sci-Fi Another day, another bounty—such..."
2,Trigun,"[Action, Adventure, Sci-Fi]","Vash the Stampede is the man with a $$60,000,0...",Action Adventure Sci-Fi,Action Adventure Sci-Fi Vash the Stampede is t...
3,Witch Hunter Robin,"[Action, Drama, Mystery, Supernatural]","Though hidden away from the general public, Wi...",Action Drama Mystery Supernatural,Action Drama Mystery Supernatural Though hidde...
4,Bouken Ou Beet,"[Action, Adventure, Fantasy]",It is the dark century and the people are suff...,Action Adventure Fantasy,Action Adventure Fantasy It is the dark centur...


In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words = 'english')
tfidf_matrix = tfidf.fit_transform(df["combined_text"])

tfidf_matrix.shape

(50, 2219)

In [14]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

cosine_sim.shape

(50, 50)

In [15]:
cosine_sim

array([[1.        , 0.21330515, 0.02978963, ..., 0.01400457, 0.00732249,
        0.04609236],
       [0.21330515, 1.        , 0.04777717, ..., 0.01586357, 0.00554852,
        0.05233109],
       [0.02978963, 0.04777717, 1.        , ..., 0.02280957, 0.01437755,
        0.05288023],
       ...,
       [0.01400457, 0.01586357, 0.02280957, ..., 1.        , 0.01083376,
        0.01646488],
       [0.00732249, 0.00554852, 0.01437755, ..., 0.01083376, 1.        ,
        0.01103461],
       [0.04609236, 0.05233109, 0.05288023, ..., 0.01646488, 0.01103461,
        1.        ]])

In [21]:
def recommend_anime(title, df, cosine_sim, top_n=5):
    indices = pd.Series(df.index, index = df['title'].str.lower())
    if title.lower() not in indices:
         return f"'{title}' not found in dataset."

    idx = indices[title.lower()]

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key = lambda x: x[1], reverse = True)

    sim_scores = sim_scores[1:top_n+1]

    recommended_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[recommended_indices].tolist()

    

In [22]:
recommend_anime('Trigun', df, cosine_sim)

['Black Cat',
 'Cowboy Bebop: Tengoku no Tobira',
 'Koukaku Kidoutai',
 'Akira',
 'Naruto']