# Import the Required Dependencies

In [35]:
import pandas as pd
from sqlalchemy import create_engine
from config import pwd_postgresql

# Store the CSVs files as a DataFrame

In [36]:
# Data Source 1 - Videogames Sales from https://www.kaggle.com/datasets/sandhyakrishnan02/video-game-sales 

csv_file_1 = "vgsales.csv"
games_sales_df = pd.read_csv(csv_file_1)
games_sales_df.head()


Unnamed: 0,Rank,Name,Platform,Year,Publisher,Developer,Critic_Score,User_Score,NA_Sales,PAL_Sales,JP_Sales,Other_Sales,Global_Sales
0,11,The Sims,Series,2000.0,Electronic Arts,Maxis,,,,,,,
1,12,Final Fantasy,Series,1987.0,Square,Square,,,,,,,
2,13,Grand Theft Auto V,All,2013.0,Rockstar Games,Rockstar North,,,,,,,
3,14,Mario Kart,Series,1992.0,Nintendo,Nintendo,,,,,,,
4,15,Need for Speed,Series,1994.0,Electronic Arts,EA Black Box,,,,,,,


In [37]:
# Data Source 2 - Videogames Review from https://www.kaggle.com/datasets/muhammadadiltalay/imdb-video-games

csv_file_2 = "imdb-videogames.csv"
games_review_df = pd.read_csv(csv_file_2)
games_review_df.head()

Unnamed: 0.1,Unnamed: 0,name,url,year,certificate,rating,votes,plot,Action,Adventure,Comedy,Crime,Family,Fantasy,Mystery,Sci-Fi,Thriller
0,0,Spider-Man,https://www.imdb.com/title/tt5807780/?ref_=adv...,2018.0,T,9.2,20759,"When a new villain threatens New York City, Pe...",True,True,False,False,False,True,False,False,False
1,1,Red Dead Redemption II,https://www.imdb.com/title/tt6161168/?ref_=adv...,2018.0,M,9.7,35703,Amidst the decline of the Wild West at the tur...,True,True,False,True,False,False,False,False,False
2,2,Grand Theft Auto V,https://www.imdb.com/title/tt2103188/?ref_=adv...,2013.0,M,9.5,59986,Three very different criminals team up for a s...,True,False,False,True,False,False,False,False,False
3,3,God of War,https://www.imdb.com/title/tt5838588/?ref_=adv...,2018.0,M,9.6,26118,"After wiping out the gods of Mount Olympus, Kr...",True,True,False,False,False,False,False,False,False
4,4,Uncharted 4: A Thief's End,https://www.imdb.com/title/tt3334704/?ref_=adv...,2016.0,T,9.5,28722,Thrown back into the dangerous underworld he'd...,True,True,False,False,False,False,False,False,False


# Format the CSV files for upload

In [38]:
# Select the relevent columns from data set 1 (sales)

new_games_sales_df = games_sales_df[['Name', 'Platform','Year', 'Publisher', 'Developer', 'Global_Sales']].copy()
new_games_sales_df.head()

Unnamed: 0,Name,Platform,Year,Publisher,Developer,Global_Sales
0,The Sims,Series,2000.0,Electronic Arts,Maxis,
1,Final Fantasy,Series,1987.0,Square,Square,
2,Grand Theft Auto V,All,2013.0,Rockstar Games,Rockstar North,
3,Mario Kart,Series,1992.0,Nintendo,Nintendo,
4,Need for Speed,Series,1994.0,Electronic Arts,EA Black Box,


In [39]:
#rename the headings to match the Database

new_games_sales_df_2 = new_games_sales_df.rename(columns={'Name' : 'name', 'Platform' : 'platform', 'Year' : 'year', 'Publisher' : 'publishers', 'Developer' : 'developer', 'Global_Sales': 'global_sales'})
new_games_sales_df_2.head()

Unnamed: 0,name,platform,year,publishers,developer,global_sales
0,The Sims,Series,2000.0,Electronic Arts,Maxis,
1,Final Fantasy,Series,1987.0,Square,Square,
2,Grand Theft Auto V,All,2013.0,Rockstar Games,Rockstar North,
3,Mario Kart,Series,1992.0,Nintendo,Nintendo,
4,Need for Speed,Series,1994.0,Electronic Arts,EA Black Box,


In [40]:
# Select the relevent columns from data set 2 (reviews)

new_games_review_df = games_review_df[['name','year','certificate','rating','votes']].copy()
new_games_review_df.head()


Unnamed: 0,name,year,certificate,rating,votes
0,Spider-Man,2018.0,T,9.2,20759
1,Red Dead Redemption II,2018.0,M,9.7,35703
2,Grand Theft Auto V,2013.0,M,9.5,59986
3,God of War,2018.0,M,9.6,26118
4,Uncharted 4: A Thief's End,2016.0,T,9.5,28722


In [41]:
#Remove the comma in the votes values

votes_df = pd.DataFrame(new_games_review_df['votes'].str.replace(",",""))
votes_df

#Join the Dataframe
new_games_review_df_2 = pd.merge(new_games_review_df, votes_df, left_index=True, right_index=True )

#Remove and rename colums
new_games_review_df_2.drop('votes_x', axis=1, inplace=True)


new_games_review_df_3 = new_games_review_df_2.rename(columns={'name': 'name', 'year' : 'year', 'certificate' :'certificate', 'rating': 'rating', 'votes_y' : 'votes'})
new_games_review_df_3.head()

Unnamed: 0,name,year,certificate,rating,votes
0,Spider-Man,2018.0,T,9.2,20759
1,Red Dead Redemption II,2018.0,M,9.7,35703
2,Grand Theft Auto V,2013.0,M,9.5,59986
3,God of War,2018.0,M,9.6,26118
4,Uncharted 4: A Thief's End,2016.0,T,9.5,28722


# Connect to the local database

In [42]:
#Connect to the Database

protocol = 'postgresql'
username = 'postgres' 
password = pwd_postgresql
host = 'localhost'
port = 5432               
database_name = 'video_games_project'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [43]:
engine.table_names()

  engine.table_names()


['video_game_sales', 'imdb_video_games']

# Load the Dataframes into the Database using Pandas

In [44]:
#Dataframe 1
new_games_sales_df_2.to_sql(name='video_game_sales', con=engine, if_exists='append', index=False)

#Dataframe 2
new_games_review_df_3.to_sql(name='imdb_video_games', con=engine, if_exists='append', index=False)

803