In [1]:
#Importing the necessary packages 
import requests
import json
import pandas as pd
from pandas.io.json import json_normalize

In [2]:
# Here we define our query as a multi-line string
query_1 = '''
{
  MediaListCollection (userName: "tetsu1290", type: ANIME, status: COMPLETED) {
    lists {
      entries {
        id
        mediaId
        score
  }
}
  }
}
'''

# Define our query variables and values that will be used in the query request


url = 'https://graphql.anilist.co'

# Make the HTTP Api request
response_1 = requests.post(url, json={'query': query_1})

In [3]:
# Here we define our query as a multi-line string
query_2 = '''
 {
  MediaListCollection (userName: "tetsu1290", type: ANIME, status: COMPLETED) {
    lists {
      entries {
        media {
          id
          title {
            romaji
            english
          }
          episodes
          source
          genres
          meanScore
          description
          duration
          seasonYear
        }
      }
    }
  }
}
'''

# Define our query variables and values that will be used in the query request


url = 'https://graphql.anilist.co'

# Make the HTTP Api request
response_2 = requests.post(url, json={'query': query_2})

In [4]:
#Checking to make sure the request is good.
response_1

<Response [200]>

In [5]:
#Checking to make sure the request is good.
response_2

<Response [200]>

In [6]:
#Saving json data as a variable
data_1 = response_1.json()

In [7]:
#Saving json data as a variable
data_2 = response_2.json()

In [8]:
#Parsing through the json to get the relevant data
entries = data_1['data']['MediaListCollection']['lists'][0]['entries']

In [9]:
#Checking the output of the parse
entries

[{'id': 40857579, 'mediaId': 16498, 'score': 10},
 {'id': 40857589, 'mediaId': 99147, 'score': 10},
 {'id': 40857595, 'mediaId': 97940, 'score': 7},
 {'id': 40857613, 'mediaId': 97888, 'score': 9},
 {'id': 40862040, 'mediaId': 101474, 'score': 9},
 {'id': 40915330, 'mediaId': 9624, 'score': 9},
 {'id': 40915331, 'mediaId': 100526, 'score': 9},
 {'id': 40915332, 'mediaId': 50, 'score': 8},
 {'id': 40915333, 'mediaId': 880, 'score': 9},
 {'id': 40915334, 'mediaId': 2198, 'score': 8},
 {'id': 40915335, 'mediaId': 20785, 'score': 8},
 {'id': 40915336, 'mediaId': 11759, 'score': 9},
 {'id': 40915337, 'mediaId': 12291, 'score': 9},
 {'id': 40915338, 'mediaId': 98251, 'score': 8},
 {'id': 40915339, 'mediaId': 53, 'score': 9},
 {'id': 40915340, 'mediaId': 394, 'score': 9},
 {'id': 40915341, 'mediaId': 101, 'score': 10},
 {'id': 40915342, 'mediaId': 230, 'score': 10},
 {'id': 40915343, 'mediaId': 102, 'score': 8},
 {'id': 40915344, 'mediaId': 21058, 'score': 8},
 {'id': 40915345, 'mediaId': 212

In [10]:
#Creating a dataframe from the data
data_1_df = pd.DataFrame.from_records(entries)

In [11]:
#Checking the outcome of the dataframe
data_1_df

Unnamed: 0,id,mediaId,score
0,40857579,16498,10
1,40857589,99147,10
2,40857595,97940,7
3,40857613,97888,9
4,40862040,101474,9
...,...,...,...
838,372376970,155418,8
839,379141502,156039,6
840,387519220,21311,6
841,392902686,141911,9


In [12]:
#Renaming mediaId column for future joining of data
data_1_df.rename(columns = {'mediaId': 'media_id'},inplace=True)

In [13]:
#Parsing through the json to get the relevant data
entries_2 = data_2['data']['MediaListCollection']['lists'][0]['entries']

In [14]:
#Checking the output of the parse
entries_2

[{'media': {'id': 16498,
   'title': {'romaji': 'Shingeki no Kyojin', 'english': 'Attack on Titan'},
   'episodes': 25,
   'source': 'MANGA',
   'genres': ['Action', 'Drama', 'Fantasy', 'Mystery'],
   'meanScore': 84,
   'description': 'Several hundred years ago, humans were nearly exterminated by titans. Titans are typically several stories tall, seem to have no intelligence, devour human beings and, worst of all, seem to do it for the pleasure rather than as a food source. A small percentage of humanity survived by walling themselves in a city protected by extremely high walls, even taller than the biggest of titans.<br><br>\r\nFlash forward to the present and the city has not seen a titan in over 100 years. Teenage boy Eren and his foster sister Mikasa witness something horrific as the city walls are destroyed by a colossal titan that appears out of thin air. As the smaller titans flood the city, the two kids watch in horror as their mother is eaten alive. Eren vows that he will mur

In [15]:
#https://ankushkunwar7777.medium.com/get-data-from-large-nested-json-file-cf1146aa8c9e
def extract_values(obj, key):
    """
    This function pulls all values of specified key from nested JSON.
    
    """
    arr = []

    def extract(obj, arr, key):
        """Recursively search for values of key in JSON tree."""
        if isinstance(obj, dict):
            for k, v in obj.items():
                if isinstance(v, (dict, list)):
                    extract(v, arr, key)
                elif k == key:
                    arr.append(v)
        elif isinstance(obj, list):
            for item in obj:
                extract(item, arr, key)
        return arr

    results = extract(obj, arr, key)
    return results

In [16]:
#https://ankushkunwar7777.medium.com/get-data-from-large-nested-json-file-cf1146aa8c9e
def item_generator(json_input, lookup_key):
    """
    This function pulls all values of a specified key from a nested JSON.
    """
    if isinstance(json_input, dict):
        for k, v in json_input.items():
            if k == lookup_key:
                yield v
            else:
                yield from item_generator(v, lookup_key)
    elif isinstance(json_input, list):
        for item in json_input:
            yield from item_generator(item, lookup_key)

In [17]:
#Iterating through the json files to get all of the information from genre. This has to be done since inside of this object
# there are lists that need to be extracted
output = []
for i in item_generator(entries_2, "genres"):
    ans = i
    output.append(ans)

In [18]:
#Checking results of the iteration
output

[['Action', 'Drama', 'Fantasy', 'Mystery'],
 ['Action', 'Drama', 'Fantasy', 'Mystery'],
 ['Action', 'Adventure', 'Comedy', 'Fantasy'],
 ['Action', 'Sports'],
 ['Action', 'Adventure', 'Fantasy'],
 ['Comedy', 'Ecchi', 'Romance', 'Supernatural'],
 ['Comedy', 'Drama', 'Romance', 'Slice of Life'],
 ['Comedy', 'Romance', 'Supernatural'],
 ['Comedy', 'Romance', 'Supernatural'],
 ['Comedy', 'Romance', 'Supernatural'],
 ['Action', 'Ecchi', 'Romance', 'Supernatural'],
 ['Action', 'Romance', 'Sci-Fi'],
 ['Comedy', 'Romance', 'Slice of Life'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Drama', 'Romance', 'Slice of Life'],
 ['Comedy', 'Drama', 'Romance', 'Slice of Life'],
 ['Drama', 'Romance', 'Slice of Life', 'Supernatural'],
 ['Action', 'Adventure', 'Comedy'],
 ['Comedy', 'Drama', 'Romance'],
 ['Drama', 'Fantasy', 'Romance'],
 ['Drama', 'Fantasy', 'Romance'],
 ['Action',
  'Adventure',
  'Drama',
  'Fantasy',
  'Horror',
  'Psychological',
  'Thriller'],
 ['Comedy', 'Romance'],
 ['Action', 'Adventure',

In [19]:
#Getting year information from json object
extract_values(entries_2, 'seasonYear')

[2013,
 2018,
 2017,
 2018,
 2018,
 2011,
 2018,
 2005,
 2006,
 2007,
 2015,
 2012,
 2012,
 2017,
 2002,
 2003,
 2005,
 2003,
 2004,
 2015,
 2016,
 2014,
 2008,
 2014,
 2009,
 1988,
 2017,
 2016,
 2010,
 2012,
 2014,
 2013,
 2010,
 2017,
 2011,
 2012,
 2015,
 2016,
 2014,
 2011,
 2017,
 2016,
 2015,
 2012,
 2011,
 2009,
 2002,
 2010,
 2014,
 2015,
 2010,
 2011,
 2009,
 2017,
 2016,
 2014,
 2014,
 2004,
 2017,
 2013,
 2016,
 2017,
 2011,
 2013,
 2006,
 2014,
 2014,
 2000,
 2014,
 2012,
 2006,
 2012,
 2005,
 2017,
 2015,
 2017,
 2012,
 2002,
 2006,
 2012,
 2012,
 2014,
 2018,
 2007,
 2008,
 2009,
 2008,
 2015,
 2007,
 2012,
 2015,
 2018,
 2006,
 2014,
 2014,
 2003,
 2016,
 2014,
 2012,
 2015,
 2013,
 2014,
 2015,
 2013,
 2014,
 2004,
 2018,
 2006,
 2015,
 2014,
 2015,
 2009,
 2016,
 2017,
 2016,
 2018,
 2015,
 2017,
 2010,
 2015,
 2015,
 2012,
 2008,
 2007,
 2004,
 2017,
 2009,
 2014,
 2006,
 2011,
 2012,
 2004,
 2010,
 2013,
 2014,
 2011,
 2013,
 2017,
 2009,
 1995,
 2004,
 2017,
 2018,

In [20]:
#Creating a dataFrame with the columns needed from the json
data_2_df = pd.DataFrame(columns=['media_id', 'title_in_romaji', 'title_in_english', 'episodes', 'source', 'genres',
                                  'mean_score', 'description', 'duration', 'year_released'])

In [21]:
#Checking json categories to make sure all were included.
entries_2

[{'media': {'id': 16498,
   'title': {'romaji': 'Shingeki no Kyojin', 'english': 'Attack on Titan'},
   'episodes': 25,
   'source': 'MANGA',
   'genres': ['Action', 'Drama', 'Fantasy', 'Mystery'],
   'meanScore': 84,
   'description': 'Several hundred years ago, humans were nearly exterminated by titans. Titans are typically several stories tall, seem to have no intelligence, devour human beings and, worst of all, seem to do it for the pleasure rather than as a food source. A small percentage of humanity survived by walling themselves in a city protected by extremely high walls, even taller than the biggest of titans.<br><br>\r\nFlash forward to the present and the city has not seen a titan in over 100 years. Teenage boy Eren and his foster sister Mikasa witness something horrific as the city walls are destroyed by a colossal titan that appears out of thin air. As the smaller titans flood the city, the two kids watch in horror as their mother is eaten alive. Eren vows that he will mur

In [22]:
#Chcecking that all columns were properly added
data_2_df

Unnamed: 0,media_id,title_in_romaji,title_in_english,episodes,source,genres,mean_score,description,duration,year_released


In [23]:
#Adding the information for each column of the dataframe using the functions.
data_2_df['media_id']= extract_values(entries_2, 'id')
data_2_df['title_in_romaji']= extract_values(entries_2, 'romaji')
data_2_df['title_in_english']= extract_values(entries_2, 'english')
data_2_df['episodes']= extract_values(entries_2, 'episodes')
data_2_df['source']= extract_values(entries_2, 'source')
data_2_df['mean_score']= extract_values(entries_2, 'meanScore')
data_2_df['description']= extract_values(entries_2, 'description')
data_2_df['duration']= extract_values(entries_2, 'duration')
data_2_df['genres']= output
data_2_df['year_released']= extract_values(entries_2, 'seasonYear')
    

In [24]:
#Checking info of the first dataframe created
data_1_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 843 entries, 0 to 842
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   id        843 non-null    int64
 1   media_id  843 non-null    int64
 2   score     843 non-null    int64
dtypes: int64(3)
memory usage: 19.9 KB


In [25]:
#Checking info of the second dataframe created
data_2_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 843 entries, 0 to 842
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   media_id          843 non-null    int64  
 1   title_in_romaji   843 non-null    object 
 2   title_in_english  766 non-null    object 
 3   episodes          843 non-null    int64  
 4   source            842 non-null    object 
 5   genres            843 non-null    object 
 6   mean_score        843 non-null    int64  
 7   description       843 non-null    object 
 8   duration          843 non-null    int64  
 9   year_released     842 non-null    float64
dtypes: float64(1), int64(4), object(5)
memory usage: 66.0+ KB


In [26]:
#Merging data based on media id from both df objects
final_anime_df_merged = data_1_df.merge(data_2_df, on = 'media_id')

In [27]:
#Checking the final merged data
final_anime_df_merged

Unnamed: 0,id,media_id,score,title_in_romaji,title_in_english,episodes,source,genres,mean_score,description,duration,year_released
0,40857579,16498,10,Shingeki no Kyojin,Attack on Titan,25,MANGA,"[Action, Drama, Fantasy, Mystery]",84,"Several hundred years ago, humans were nearly ...",24,2013.0
1,40857589,99147,10,Shingeki no Kyojin 3,Attack on Titan Season 3,12,MANGA,"[Action, Drama, Fantasy, Mystery]",85,Eren and his companions in the 104th are assig...,24,2018.0
2,40857595,97940,7,Black Clover,Black Clover,170,MANGA,"[Action, Adventure, Comedy, Fantasy]",78,"In a world where magic is everything, Asta and...",24,2017.0
3,40857613,97888,9,Baki,BAKI,26,MANGA,"[Action, Sports]",72,"The protagonist, Baki Hanma, trains with an in...",24,2018.0
4,40862040,101474,9,Overlord III,Overlord III,13,LIGHT_NOVEL,"[Action, Adventure, Fantasy]",77,The third season of <i>Overlord</i>.<br>\n<br>...,24,2018.0
...,...,...,...,...,...,...,...,...,...,...,...,...
838,372376970,155418,8,"Seija Musou: Salaryman, Isekai de Ikinokoru Ta...",The Great Cleric,12,LIGHT_NOVEL,"[Adventure, Comedy, Fantasy]",70,Can a former salaryman become peerless in anot...,24,2023.0
839,379141502,156039,6,Boushoku no Berserk,Berserk of Gluttony,12,LIGHT_NOVEL,"[Action, Adventure, Drama, Fantasy, Romance]",67,Fate Graphite was born into a world where magi...,24,2023.0
840,387519220,21311,6,Bungou Stray Dogs,Bungo Stray Dogs,12,MANGA,"[Action, Comedy, Mystery, Supernatural]",77,"Stalked by a beastly white tiger, Atsushi Naka...",24,2016.0
841,392902686,141911,9,Skip to Loafer,Skip and Loafer,12,MANGA,"[Comedy, Romance, Slice of Life]",81,This country girl is ready for the big city! W...,24,2023.0


In [28]:
#Checking data of the final dataframe
final_anime_df_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 843 entries, 0 to 842
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                843 non-null    int64  
 1   media_id          843 non-null    int64  
 2   score             843 non-null    int64  
 3   title_in_romaji   843 non-null    object 
 4   title_in_english  766 non-null    object 
 5   episodes          843 non-null    int64  
 6   source            842 non-null    object 
 7   genres            843 non-null    object 
 8   mean_score        843 non-null    int64  
 9   description       843 non-null    object 
 10  duration          843 non-null    int64  
 11  year_released     842 non-null    float64
dtypes: float64(1), int64(6), object(5)
memory usage: 85.6+ KB


In [29]:
#Chgecking for null values in the source column
null_rows = final_anime_df_merged.loc[final_anime_df_merged['source'].isnull()]
null_rows

Unnamed: 0,id,media_id,score,title_in_romaji,title_in_english,episodes,source,genres,mean_score,description,duration,year_released
292,40915624,3392,10,Kiniro no Corda: primo passo - Hitonatsu no En...,La Corda D'Oro: Primo Passo - A Summer Encore,1,,"[Music, Romance]",68,Hino Kahoko goes to a summer training camp to ...,29,2007.0


In [30]:
#Checking the sum of episodes
final_anime_df_merged['episodes'].sum()

14541

In [31]:
#Checking the sum of minutes
final_anime_df_merged['duration'].sum()

20279

In [32]:
#Separated genres per anime
final_anime_df_merged_exploded = final_anime_df_merged.explode('genres')

In [33]:
#Checked on outcome of separation
final_anime_df_merged_exploded

Unnamed: 0,id,media_id,score,title_in_romaji,title_in_english,episodes,source,genres,mean_score,description,duration,year_released
0,40857579,16498,10,Shingeki no Kyojin,Attack on Titan,25,MANGA,Action,84,"Several hundred years ago, humans were nearly ...",24,2013.0
0,40857579,16498,10,Shingeki no Kyojin,Attack on Titan,25,MANGA,Drama,84,"Several hundred years ago, humans were nearly ...",24,2013.0
0,40857579,16498,10,Shingeki no Kyojin,Attack on Titan,25,MANGA,Fantasy,84,"Several hundred years ago, humans were nearly ...",24,2013.0
0,40857579,16498,10,Shingeki no Kyojin,Attack on Titan,25,MANGA,Mystery,84,"Several hundred years ago, humans were nearly ...",24,2013.0
1,40857589,99147,10,Shingeki no Kyojin 3,Attack on Titan Season 3,12,MANGA,Action,85,Eren and his companions in the 104th are assig...,24,2018.0
...,...,...,...,...,...,...,...,...,...,...,...,...
841,392902686,141911,9,Skip to Loafer,Skip and Loafer,12,MANGA,Romance,81,This country girl is ready for the big city! W...,24,2023.0
841,392902686,141911,9,Skip to Loafer,Skip and Loafer,12,MANGA,Slice of Life,81,This country girl is ready for the big city! W...,24,2023.0
842,393149541,130003,7,Bocchi the Rock!,BOCCHI THE ROCK!,12,MANGA,Comedy,87,"Hitori Gotou, “Bocchi-chan,” is a girl who’s s...",24,2022.0
842,393149541,130003,7,Bocchi the Rock!,BOCCHI THE ROCK!,12,MANGA,Music,87,"Hitori Gotou, “Bocchi-chan,” is a girl who’s s...",24,2022.0


In [34]:
#Exporting file for analysis
final_anime_df_merged_exploded.to_csv('jonathan_anime_db.csv')

In [35]:
#Exporting file for analysis
final_anime_df_merged.to_csv('jonathan_anime_db_non_exploded.csv')

In [36]:
#Checking the average score I have given
final_anime_df_merged['score'].mean()

7.933570581257414

In [37]:
#Converting score of the community to a 1-10 scale
final_anime_df_merged['mean_score'].mean()/10

6.95373665480427

In [38]:
#Checking the median score I have given
final_anime_df_merged['score'].median()

8.0

In [39]:
#Checking the median score that the community has given
final_anime_df_merged['mean_score'].median()

70.0