In [11]:
import requests
import pandas as pd
import time

In [12]:
class RateLimiter:
    def __init__(self, per_second=3, per_minute=60):
        self.per_second = per_second
        self.per_minute = per_minute
        self.requests_made_second = 0
        self.requests_made_minute = 0
        self.start_time = time.time()

    def wait_for_second(self):
        current_time = time.time()
        elapsed_time = current_time - self.start_time
        if elapsed_time < 1:
            self.requests_made_second += 1
            if self.requests_made_second >= self.per_second:
                time.sleep(1 - elapsed_time)
                self.requests_made_second = 0
                self.start_time = time.time()

    def wait_for_minute(self):
        current_time = time.time()
        elapsed_time = current_time - self.start_time
        if elapsed_time < 60:
            self.requests_made_minute += 1
            if self.requests_made_minute >= self.per_minute:
                time.sleep(60 - elapsed_time)
                self.requests_made_minute = 0
                self.start_time = time.time()

In [13]:
def fetch_data_with_retry(endpoint_url, rate_limiter, max_retries=3):
    retries = 0
    while retries < max_retries:
        rate_limiter.wait_for_second()
        rate_limiter.wait_for_minute()
        response = requests.get(endpoint_url)
        if response.status_code == 200:
            return response.json()
        elif response.status_code == 429:  # Rate limit exceeded
            print("Rate limit exceeded. Waiting before retrying...")
            time.sleep(10)
            retries += 1
        else:
            print(f"Failed to fetch data. Status code: {response.status_code}")
            return None
    print("Max retries reached. Unable to fetch data.")
    return None

In [14]:
base_url = "https://api.jikan.moe/v4/seasons"

limiter = RateLimiter()

years = range(1970, 2025)

seasons = ["winter", "spring", "summer", "fall"]

data_list = []

In [15]:
for year in years:
    for season in seasons:
        endpoint_url = f"{base_url}/{year}/{season}"  # Construct the endpoint URL
        data = fetch_data_with_retry(endpoint_url, limiter)  # Fetch data for the current year and season
        if data:
            for entry in data['data']:
                new_entry = {}
                for key in ["mal_id", "titles", "type", "source", "episodes", 
                            "rating", "score", "scored_by", "rank", "popularity", "members", "favorites",
                            "synopsis", "studios", "genres", "explicit_genres", "themes"]:
                    if key in entry:
                        new_entry[key] = entry[key]
                    else:
                        new_entry[key] = None
                data_list.append(new_entry)

In [16]:
df = pd.DataFrame(data_list)

In [17]:
df.head()

Unnamed: 0,mal_id,titles,type,source,episodes,rating,score,scored_by,rank,popularity,members,favorites,synopsis,studios,genres,explicit_genres,themes
0,1550,"[{'type': 'Default', 'title': 'Attack No.1'}, ...",TV,Manga,104.0,PG-13 - Teens 13 or older,7.07,4117.0,4124.0,6406,9979,72,Kozue is a middleschool girl and enthusiastic ...,"[{'mal_id': 65, 'type': 'anime', 'name': 'Toky...","[{'mal_id': 8, 'type': 'anime', 'name': 'Drama...",[],"[{'mal_id': 77, 'type': 'anime', 'name': 'Team..."
1,2406,"[{'type': 'Default', 'title': 'Sazae-san'}, {'...",TV,4-koma manga,,G - All Ages,6.14,1869.0,9126.0,6890,8403,38,The main character is a mother named Sazae-san...,"[{'mal_id': 191, 'type': 'anime', 'name': 'Eik...","[{'mal_id': 4, 'type': 'anime', 'name': 'Comed...",[],[]
2,9895,"[{'type': 'Default', 'title': 'The Bathroom'},...",Movie,Original,1.0,R - 17+ (violence & profanity),4.22,1003.0,13223.0,11636,1779,0,A surrealistic short from minimalist cartoonis...,"[{'mal_id': 377, 'type': 'anime', 'name': 'Kur...","[{'mal_id': 5, 'type': 'anime', 'name': 'Avant...",[],[]
3,9163,"[{'type': 'Default', 'title': 'Attack No.1 (19...",Movie,Unknown,1.0,PG-13 - Teens 13 or older,6.39,444.0,7795.0,12299,1410,3,"The first recap film of Attack No.1 , it cover...",[],"[{'mal_id': 8, 'type': 'anime', 'name': 'Drama...",[],"[{'mal_id': 77, 'type': 'anime', 'name': 'Team..."
4,7259,"[{'type': 'Default', 'title': 'Kenju Giga'}, {...",Movie,Original,1.0,PG-13 - Teens 13 or older,4.92,595.0,12800.0,12805,1206,0,"A dog race is interrupted by a ringmaster, who...",[],[],[],"[{'mal_id': 20, 'type': 'anime', 'name': 'Paro..."


In [18]:
df.drop(columns=['explicit_genres'], inplace=True)

In [19]:
for index, row in df.iterrows():
    
    # fixing titles
    default_title = ""
    english_title = ""
    for title_info in row['titles']:
        if title_info['type'] == 'Default':
            default_title = title_info['title']
        elif title_info['type'] == 'English':
            english_title = title_info['title']
    df.at[index, 'titles'] = [default_title, english_title]

    # fixing studios
    name = ""
    for studio_info in row['studios']:
        name = studio_info['name']
    df.at[index, 'studios'] = name

    # fixing generes
    genre = []
    for genre_info in row['genres']:
        genre.append(genre_info['name'])
    df.at[index, 'genres'] = genre

    # fixing theme
    theme = []
    for theme_info in row['themes']:
        theme.append(theme_info['name'])
    df.at[index, 'themes'] = theme

In [20]:
df.head()

Unnamed: 0,mal_id,titles,type,source,episodes,rating,score,scored_by,rank,popularity,members,favorites,synopsis,studios,genres,themes
0,1550,"[Attack No.1, ]",TV,Manga,104.0,PG-13 - Teens 13 or older,7.07,4117.0,4124.0,6406,9979,72,Kozue is a middleschool girl and enthusiastic ...,Tokyo Movie Shinsha,"[Drama, Sports]",[Team Sports]
1,2406,"[Sazae-san, Mrs. Sazae]",TV,4-koma manga,,G - All Ages,6.14,1869.0,9126.0,6890,8403,38,The main character is a mother named Sazae-san...,Eiken,"[Comedy, Slice of Life]",[]
2,9895,"[The Bathroom, ]",Movie,Original,1.0,R - 17+ (violence & profanity),4.22,1003.0,13223.0,11636,1779,0,A surrealistic short from minimalist cartoonis...,Kuri Jikken Manga Koubou,"[Avant Garde, Comedy]",[]
3,9163,"[Attack No.1 (1970), ]",Movie,Unknown,1.0,PG-13 - Teens 13 or older,6.39,444.0,7795.0,12299,1410,3,"The first recap film of Attack No.1 , it cover...",,"[Drama, Sports]",[Team Sports]
4,7259,"[Kenju Giga, Anthropo-Cynical Farce]",Movie,Original,1.0,PG-13 - Teens 13 or older,4.92,595.0,12800.0,12805,1206,0,"A dog race is interrupted by a ringmaster, who...",,[],"[Parody, Psychological]"


In [21]:
df.info

<bound method DataFrame.info of       mal_id                                             titles   type  \
0       1550                                    [Attack No.1, ]     TV   
1       2406                            [Sazae-san, Mrs. Sazae]     TV   
2       9895                                   [The Bathroom, ]  Movie   
3       9163                             [Attack No.1 (1970), ]  Movie   
4       7259               [Kenju Giga, Anthropo-Cynical Farce]  Movie   
...      ...                                                ...    ...   
5051   56230  [Jiisan Baasan Wakagaeru, Grandpa and Grandma ...     TV   
5052   56923  [Lv2 kara Cheat datta Motoyuusha Kouho no Matt...     TV   
5053   53835                   [Unnamed Memory, Unnamed Memory]     TV   
5054   52995  [Arifureta Shokugyou de Sekai Saikyou Season 3...     TV   
5055   37885                       [Super Dragon Ball Heroes, ]    ONA   

            source  episodes                          rating  score  \
0       

In [22]:
# Exporting DataFrame to JSON
df.to_json("data/data.json", orient="records")