# Section 1 - Pulling data from API Endpoints and Cleaning
Libraries

In [142]:
import os
import pandas as pd
import requests
from sqlalchemy import create_engine
from sqlalchemy import exc
from dotenv import load_dotenv

In [143]:
load_dotenv()

True

Set up credentials

In [144]:

auth_url = 'https://id.twitch.tv/oauth2/token'
cred = {'client_id': os.getenv('CLIENT_ID'),
           'client_secret': os.getenv('CLIENT_SECRET'),
           'grant_type':'client_credentials'}
r = requests.post(auth_url, data=cred)
data = r.json()
print('Bearer '+data['access_token'])

base_url = 'https://api.igdb.com/v4'
header = {'Client-ID': os.getenv('CLIENT_ID'),
           'Authorization': 'Bearer '+data['access_token']}

Bearer mg9xibn9yag6hk9wll3935z6x78aa7


## Games Endpoint
### games table

In [145]:
r = requests.post(base_url+'/games', headers=header,
data='''fields name, category, first_release_date, status,
aggregated_rating, aggregated_rating_count, rating, rating_count,
total_rating, total_rating_count; where category = 0; limit 100;''')
games_table = r.json()
games_table_df = pd.DataFrame(games_table)

games_table_df



Unnamed: 0,id,category,first_release_date,name,status,rating,rating_count,total_rating,total_rating_count,aggregated_rating,aggregated_rating_count
0,35004,0,1.437696e+09,Demon Horde Master,,,,,,,
1,176043,0,,Zero Racers,6.0,,,,,,
2,89616,0,,Bubble Whirl Shooter,8.0,,,,,,
3,118008,0,1.589501e+09,Tabletop Playground,4.0,,,,,,
4,95080,0,,Dotra,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
95,77236,0,1.131667e+09,Fengse Huanxiang 4: Shengzhan de Zhongyan,,,,,,,
96,70621,0,,Fruit Attack,,,,,,,
97,137234,0,,Slip Slop,,,,,,,
98,137242,0,,Premier Collection III,,,,,,,


### genres table

In [146]:
r = requests.post(base_url+'/games', headers=header,
data='fields genres; limit 100;')
genres_table = r.json()
genres_table_df =pd.DataFrame(genres_table).explode('genres')
genres_table_df

Unnamed: 0,id,genres
0,35004,15
0,35004,32
1,176043,10
2,89616,9
2,89616,33
...,...,...
96,123238,32
97,196510,15
97,196510,32
98,88217,


### keywords table

In [147]:
r = requests.post(base_url+'/games', headers=header,
data='fields keywords; limit 100;')
keywords_table = r.json()
keywords_table_df =pd.DataFrame(keywords_table).explode('keywords')
keywords_table_df

Unnamed: 0,id,keywords
0,35004,
1,176043,5340
1,176043,23861
2,89616,
3,118008,
...,...,...
95,51648,1026
96,123238,3257
97,196510,
98,88217,


### platforms table

In [148]:
r = requests.post(base_url+'/games', headers=header,
data='fields platforms; limit 100;')
platforms_table = r.json()
platforms_table_df =pd.DataFrame(platforms_table).explode('platforms')
platforms_table_df

Unnamed: 0,id,platforms
0,35004,6
1,176043,87
2,89616,34
2,89616,39
3,118008,6
...,...,...
95,51648,6
96,123238,6
97,196510,
98,88217,


## Genres Endpoint
### genres_info table

In [149]:
r = requests.post(base_url+'/genres', headers=header, data='fields name; limit 500;')
genre_info_table = r.json()
genre_info_table_df = pd.DataFrame(genre_info_table)
genre_info_table_df

Unnamed: 0,id,name
0,4,Fighting
1,5,Shooter
2,7,Music
3,8,Platform
4,9,Puzzle
5,10,Racing
6,11,Real Time Strategy (RTS)
7,12,Role-playing (RPG)
8,13,Simulator
9,14,Sport


## Keyword Endpoint
### keyword_info_table

In [150]:
r = requests.post(base_url+'/keywords', headers=header, data='fields slug; limit 100;')
keyword_info_table = r.json()
keyword_info_table_df = pd.DataFrame(keyword_info_table)
keyword_info_table_df.head()

Unnamed: 0,id,slug
0,3576,oi
1,4010,argentina
2,13017,fremen
3,1027,adventure
4,19226,fight-trivia


## Platform Endpoint
### Platform Table

In [151]:
r = requests.post(base_url+'/platforms', headers=header, data='fields name; limit 100;')
platform_info_table = r.json()
platform_info_table_df = pd.DataFrame(platform_info_table)
platform_info_table_df.head()

Unnamed: 0,id,name
0,158,Commodore CDTV
1,339,Sega Pico
2,8,PlayStation 2
3,39,iOS
4,94,Commodore Plus/4


# Section 2 - Clean and Format Data

In [152]:
# Renaming df tables to sync with DB Schema
# Games Table:
games_table_df = games_table_df.rename(columns={"id":"game_id"})

# Genre Tables:
genres_table_df = genres_table_df.rename(columns={"id":"game_id", "genres":"genre_id"})
genre_info_table_df = genre_info_table_df.rename(columns={"id":"genre_id", "name":"genre_name"})

# Keyword Tables:
keywords_table_df = keywords_table_df.rename(columns={"id":"game_id", "keywords":"keyword_id"})
keyword_info_table_df = keyword_info_table_df.rename(columns={"id":"keyword_id", "slug":"keyword_name"})

# Platform Tables:
platforms_table_df = platforms_table_df.rename(columns={"id":"game_id", "platforms":"platform_id"})
platform_info_table_df = platform_info_table_df.rename(columns={"id":"platform_id", "name":"platform_name"})

In [153]:
# Converting unix time to datetime format
games_table_df['first_release_date'] = pd.to_datetime(games_table_df['first_release_date'], unit='s', origin='unix')

# Section 3 - Upload to AWS RDS MySQL Server

In [154]:
engine = create_engine(f"mysql+pymysql://{os.getenv('USER')}:{os.getenv('PASS')}@{os.getenv('RDS_ENDPOINT')}/igdb"
                       .format(host= os.getenv("RDS_ENDPOINT"),
                               port= os.getenv("RDS_PORT"),
                               user=os.getenv('USER'),
                               pw=os.getenv('PASS'),
                               ))

In [155]:
for i in range(len(games_table_df)):
    try:
        games_table_df.iloc[i:i+1].to_sql(name="games",if_exists='append',con = engine, index=False)
    except exc.IntegrityError as e:
        pass

In [156]:
for i in range(len(genres_table_df)):
    try:
        genres_table_df.iloc[i:i+1].to_sql(name="genres",if_exists='append',con = engine, index=False)
    except exc.IntegrityError as e:
        pass

In [157]:
for i in range(len(keywords_table_df)):
    try:
        keywords_table_df.iloc[i:i+1].to_sql(name="keywords",if_exists='append',con = engine, index=False)
    except exc.IntegrityError as e:
        pass

In [158]:
for i in range(len(platforms_table_df)):
    try:
        platforms_table_df.iloc[i:i+1].to_sql(name="platforms",if_exists='append',con = engine, index=False)
    except exc.IntegrityError as e:
        pass

In [159]:
genre_info_table_df.to_sql(name="genres_info",if_exists='replace', con = engine, index=False)
keyword_info_table_df.to_sql(name='keyword_info',if_exists='replace', con= engine, index=False)
platform_info_table_df.to_sql(name="platform_info", if_exists='replace', con= engine, index=False)