# Import Dependencies

In [1]:
import pandas as pd
from sqlalchemy import create_engine
import psycopg2

# Extract our data files and store in data frames

In [2]:
csv_file1 = "qgdpstate0519_3.csv"
gdp_by_state_df = pd.read_csv(csv_file1)
gdp_by_state_df.head()

Unnamed: 0,"Table 1. Percent Change in Real Gross Domestic Product (GDP) by State and Region, 2017:Q1-2018:Q4",Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,,Seasonally adjusted at annual rates,,,,,,,,,
1,,2017,,,,2018,,,,Rank 2018:Q4,
2,,Q1,Q2,Q3,Q4,Q1,Q2,Q3,Q4,,
3,United States,1.8,3.0,2.8,2.3,2.2,4.2,3.4,2.2,--,
4,New England,0.6,2.8,3.1,-0.5,3.6,0.6,3.3,1.7,--,


In [3]:
csv_file2 = "spi0619.csv"
personal_income_df = pd.read_csv(csv_file2)
personal_income_df.head()

Unnamed: 0,"Table 1. Personal Income, by State and Region, 2017:Q4-2019:Q1",Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,,"[Millions of dollars, seasonally adjusted at a...",,,,,,[Seasonally adjusted at annual rate],,,,,,
1,,2017,2018,,,,2019,Percent change from preceding quarter1,,,,,Rank,
2,,Q4,Q1r,Q2r,Q3r,Q4r,Q1p,2018:Q1,2018:Q2,2018:Q3,2018:Q4,2019:Q1,2018:Q4 - 2019:Q1,
3,United States,17090705,17313151,17460221,17647510,17825728,17975691,5.3,3.4,4.4,4.1,3.4,--,
4,New England,963895,979868,981773,992031,998048,1004541,6.8,0.8,4.2,2.4,2.6,--,


# Clean data by selecting specific columns and dropping columns

In [13]:
new_gdp_by_state_df = gdp_by_state_df[['Table 1. Percent Change in Real Gross Domestic Product (GDP) by State and Region, 2017:Q1-2018:Q4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8']].copy()

new_gdp_by_state_df = new_gdp_by_state_df.rename(columns={"Table 1. Percent Change in Real Gross Domestic Product (GDP) by State and Region, 2017:Q1-2018:Q4": "state",
                                                          "Unnamed: 5": "fy18_q1_gdp",
                                                          "Unnamed: 6": "fy18_q2_gdp",
                                                          "Unnamed: 7": "fy18_q3_gdp",
                                                          "Unnamed: 8": "fy18_q4_gdp" })
new_gdp_by_state_df = new_gdp_by_state_df.drop([0, 1])
new_gdp_by_state_df.head()

Unnamed: 0,state,fy18_q1_gdp,fy18_q2_gdp,fy18_q3_gdp,fy18_q4_gdp
2,,Q1,Q2,Q3,Q4
3,United States,2.2,4.2,3.4,2.2
4,New England,3.6,0.6,3.3,1.7
5,Connecticut,1.7,-3.6,9.0,1.8
6,Maine,2.0,2.5,3.1,0.7


In [14]:
new_personal_income_df = personal_income_df[['Table 1. Personal Income, by State and Region, 2017:Q4-2019:Q1',  'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5']].copy()

new_personal_income_df = new_personal_income_df.rename(columns={"Table 1. Personal Income, by State and Region, 2017:Q4-2019:Q1": "state",
                                                           "Unnamed: 2": "fy18_q1_income",
                                                          "Unnamed: 3": "fy18_q2_income",
                                                          "Unnamed: 4": "fy18_q3_income",
                                                          "Unnamed: 5": "fy18_q4_income" })


new_personal_income_drop_rows_df = new_personal_income_df.drop([0, 1])

new_personal_income_drop_rows_df.head()

Unnamed: 0,state,fy18_q1_income,fy18_q2_income,fy18_q3_income,fy18_q4_income
2,,Q1r,Q2r,Q3r,Q4r
3,United States,17313151,17460221,17647510,17825728
4,New England,979868,981773,992031,998048
5,Connecticut,263070,263185,267948,268375
6,Maine,63811,64257,64835,65049


# Connect to local database

In [15]:
rds_connection_string = "postgres:roxgael2@localhost:5432/gdp_saving_db"
engine = create_engine(f'postgresql://{rds_connection_string}')
engine.table_names()

[]

In [36]:
new_personal_income_drop_rows_df.to_sql(name='income', con=engine, if_exists='append', index=True)

In [37]:
new_gdp_by_state_df.to_sql(name='gdp', con=engine, if_exists='append', index=True)

# Check for tables

In [38]:
pd.read_sql_query('select * from income', con=engine).head()

Unnamed: 0,index,state,fy18_q1_income,fy18_q2_income,fy18_q3_income,fy18_q4_income
0,2,,Q1r,Q2r,Q3r,Q4r
1,3,United States,17313151,17460221,17647510,17825728
2,4,New England,979868,981773,992031,998048
3,5,Connecticut,263070,263185,267948,268375
4,6,Maine,63811,64257,64835,65049


In [39]:
pd.read_sql_query('select * from gdp', con=engine).head()

Unnamed: 0,index,state,fy18_q1_gdp,fy18_q2_gdp,fy18_q3_gdp,fy18_q4_gdp
0,2,,Q1,Q2,Q3,Q4
1,3,United States,2.2,4.2,3.4,2.2
2,4,New England,3.6,0.6,3.3,1.7
3,5,Connecticut,1.7,-3.6,9.0,1.8
4,6,Maine,2.0,2.5,3.1,0.7


# Confirm data in joined table

In [51]:
query = 'SELECT gdp.state, gdp.fy18_q1_gdp, gdp.fy18_q2_gdp, gdp.fy18_q3_gdp, gdp.fy18_q4_gdp, income.fy18_q1_income, income.fy18_q2_income, income.fy18_q3_income, income.fy18_q4_income FROM gdp FULL OUTER JOIN income ON gdp.index = income.index;'
joined_table_df = pd.read_sql_query(query, con=engine)
joined_table_df = joined_table_df[:61]
joined_table_df

Unnamed: 0,state,fy18_q1_gdp,fy18_q2_gdp,fy18_q3_gdp,fy18_q4_gdp,fy18_q1_income,fy18_q2_income,fy18_q3_income,fy18_q4_income
0,,Q1,Q2,Q3,Q4,Q1r,Q2r,Q3r,Q4r
1,United States,2.2,4.2,3.4,2.2,17313151,17460221,17647510,17825728
2,New England,3.6,0.6,3.3,1.7,979868,981773,992031,998048
3,Connecticut,1.7,-3.6,9.0,1.8,263070,263185,267948,268375
4,Maine,2.0,2.5,3.1,0.7,63811,64257,64835,65049
5,Massachusetts,4.7,2.2,1.8,1.8,479612,480668,484706,488748
6,New Hampshire,6.4,-0.7,3.9,1.7,82729,82547,83488,83621
7,Rhode Island,1.3,3.2,-5.7,1.0,57469,57688,57505,58436
8,Vermont,0.9,4.0,-1.1,1.9,33176,33429,33548,33820
9,Mideast,1.1,3.9,2.6,1.1,3098408,3125165,3154948,3167433


In [52]:
joined_table_df.to_sql(name='income_gdp', con=engine, if_exists='append', index=False)