In [8]:
# Importing Dependencies
import pandas as pd
from sqlalchemy import create_engine

In [9]:
#Setting path to csv created from api call data
api_data = "./Outputs/active_count.csv"
vg_data = "./Resources/vgsales_clean.csv"

# Converting csv to pandas dataframe and verifying data
player_count_df = pd.read_csv(api_data)
vg_data_df = pd.read_csv(vg_data)

In [10]:
# Defining columns of interest from imported data
count_cols = ["AppID", "Current Players"]

# Dropping columns outside of defined count_cols
count_transformed = player_count_df[count_cols].copy()
count_transformed = count_transformed.rename(columns={"AppID": "appid",
                                                      "Current Players": "current_players"})

# Dropping any potential duplicates to maintain data integrity
count_transformed.drop_duplicates("appid", inplace=True)

# Setting game AppID as index to simplify data location in case multiple get requests are performed and to simplify SQL import
count_transformed.set_index("appid", inplace=True)

# Verifying dataframe columns were transformed correctly
count_transformed.head()

Unnamed: 0_level_0,current_players
appid,Unnamed: 1_level_1
10,14349


In [11]:
# Filling any NA values to avoid pandas conflicts
vg_data_df.fillna(0, inplace=True)

# Setting "Year" column dtype to int instead of float
vg_data_df = vg_data_df.astype({"Year": "int64"})

# Defining columns of interest from imported data
vg_cols = ["Rank", "Name", "Platform", "Year", "Genre", "Publisher", "Global_Sales"]

# Dropping columns outside of defined count_cols
vg_transformed = vg_data_df[vg_cols].copy()

vg_rename = vg_transformed.rename(columns={"Rank": "rank",
                                           "Name": "name",
                                           "Platform": "platform",
                                           "Year": "year",
                                           "Genre": "genre",
                                           "Publisher": "publisher",
                                           "Global_Sales": "global_sales"})

# Verifying dataframe columns were transformed correctly
vg_rename.head()

Unnamed: 0,rank,name,platform,year,genre,publisher,global_sales
0,1,Wii Sports,Wii,2006,Sports,Nintendo,82.74
1,2,Super Mario Bros.,NES,1985,Platform,Nintendo,40.24
2,3,Mario Kart Wii,Wii,2008,Racing,Nintendo,35.82
3,4,Wii Sports Resort,Wii,2009,Sports,Nintendo,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996,Role-Playing,Nintendo,31.37


In [12]:
# Creating SQL engine
connection_string = "postgres:postgres@localhost:5432/playercount_db"
engine = create_engine(f'postgresql://{connection_string}')

In [6]:
# Verifying tables in database
engine.table_names()

['current_players', 'vg_data']

In [7]:
# Importing dataframe data into SQL database
count_transformed.to_sql(name='current_players', con=engine, if_exists='append', index=True)
vg_rename.to_sql(name='vg_data', con=engine, if_exists='append', index=False)

In [None]:
create_tables = open('SQL/schemas.sql')
table_text = ""

for text in create_tables:
    qtable_text = table_text + text

print(table_text)
player_query_df = pd.read_sql_query(query_text, con=engine)

player_query_df

In [None]:
query_str = open('SQL/player_query.sql')
query_text = ""

for text in query_str:
    query_text = query_text + text

print(query_text)
player_query_df = pd.read_sql_query(query_text, con=engine)

player_query_df

In [None]:
query_str = open('SQL/players_query.sql')
query_text = ""

for text in query_str:
    query_text = query_text + text

print(query_text)
player_query_df = pd.read_sql_query(query_text, con=engine)

player_query_df