## Overview

In [18]:
import pandas as pd
from sqlalchemy import create_engine, inspect

## Extract

In [2]:
file1 = "Resources/world_population.csv"
file2 = "Resources/vgsales.csv"

In [3]:
world_pop_df = pd.read_csv(file1)
world_pop_df.head()

Unnamed: 0,Rank,CCA3,Country,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771,38972230,33753499,28189672,19542982,10694796,12486631,10752971,652230,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321,2866849,2882481,2913399,3182021,3295066,2941651,2324731,28748,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225,43451666,39543154,35856344,30774621,25518074,18739378,13795915,2381741,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273,46189,51368,54849,58230,47818,32886,27075,199,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824,77700,71746,71519,66097,53569,35611,19860,468,170.5641,1.01,0.0


In [4]:
vg_sales_df = pd.read_csv(file2)
vg_sales_df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


## Transform

In [5]:
#group population data by continents
continents = world_pop_df.groupby(world_pop_df['Continent']).sum()
continents

Unnamed: 0_level_0,Rank,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Africa,5253,1426730932,1360671810,1201102442,1055228072,818946032,638150629,481536377,365444348,30317963,7127.7158,58.2109,17.87
Asia,3878,4721383274,4663086535,4458250182,4220041327,3735089604,3210563577,2635334228,2144906290,32138141,51251.2068,50.4692,59.19
Europe,6225,743147538,745792196,741535608,735613934,726093423,720320797,692527159,655923991,23010411,33166.2371,50.1128,9.33
North America,6437,600296136,594236593,570383850,542720651,486069584,421266425,368293361,315434606,24244178,10910.4703,40.167,7.51
Oceania,4336,45038554,43933426,40403283,37102764,31222778,26743822,22920240,19480270,8515081,3048.4905,23.1698,0.55
South America,1366,436816608,431530043,413134396,393078250,349634282,297146415,241789006,192947156,17833382,293.6077,14.1114,5.48


In [6]:
#variables
na_pop = continents.loc['North America', '2020 Population']

eu_pop = continents.loc['Europe', '2020 Population']

world_pop = continents['2020 Population'].sum()

japan_pop = world_pop_df.loc[world_pop_df['Country']== 'Japan', '2020 Population'].values[0]

In [7]:
#save region populations into table
population = pd.DataFrame({"Region":["na", "eu", "jp", "wo"], "Population":[na_pop, eu_pop, japan_pop, world_pop]})
population

Unnamed: 0,Region,Population
0,na,594236593
1,eu,745792196
2,jp,125244761
3,wo,7839250603


In [9]:
#create video_games table
vg_sales_df= vg_sales_df.rename(columns={"Rank": "Game_Id"})
video_games = vg_sales_df[["Game_Id", "Name", "Platform"]]
video_games.head()

Unnamed: 0,Game_Id,Name,Platform
0,1,Wii Sports,Wii
1,2,Super Mario Bros.,NES
2,3,Mario Kart Wii,Wii
3,4,Wii Sports Resort,Wii
4,5,Pokemon Red/Pokemon Blue,GB


In [17]:
#create sales table
sales=vg_sales_df.melt(value_vars=["NA_Sales", "EU_Sales", "JP_Sales", "Global_Sales"],id_vars=["Game_Id"], 
                       var_name="Region", value_name="Sales")

sales["Region"].replace({"NA_Sales": "na", "EU_Sales": "eu", "JP_Sales": "jp", "Global_Sales": "wo"}, inplace=True)

sales["Sales"]= sales["Sales"]*1000000

sales.head()

Unnamed: 0,Game_Id,Region,Sales
0,1,na,41490000.0
1,2,na,29080000.0
2,3,na,15850000.0
3,4,na,15750000.0
4,5,na,11270000.0


## Load