# Project 2 - Team 6

## ETL 

### Import Dependencies

In [None]:
import os
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import inspect

--------------------------------

# ABS_ERP_COMP.csv

### Load CSV

In [None]:
csv_file = os.path.join('..', 'Resources', 'ABS_ERP_COMP_CLEAN.csv')
# csv_file = "Resources/ABS_ERP_COMP_CLEAN.csv"

erp_data_df = pd.read_csv(csv_file)
erp_data_df.head()

----------------------------------------

# States Table

<!-- ![states_table.png](attachment:states_table.png) -->

<img src="../Images/states_table.png" 
     align="left" 
     width="250" />

### Copy Region Colum to New DF - For Table 'States'

In [None]:
# Copy column to new DF
new_test_region_df = erp_data_df[['REGION: Region']].copy()
new_test_region_df.head()

In [None]:
# Drop duplicates
new_test_region_df = new_test_region_df.sort_values('REGION: Region').drop_duplicates('REGION: Region', keep='last')

In [None]:
# Create Index Column with Unique Values
new_test_region_df.reset_index(inplace=True)
new_test_region_df

In [None]:
# Rename Column Headers to match Schemata
new_test_region_df.rename(columns = {'index':'state_id', 'REGION: Region': 'state_name'}, inplace=True)
new_test_region_df

-------------------

# Quarters Table

<!-- ![states_table.png](attachment:states_table.png) -->
<img src="../Images/quarters_table.png" 
     align="left" 
     width="250" />

### Copy Time Period Colum to New DF - For Table 'Quarters'

In [None]:
# Copy column to new DF
new_test_quarter_df = erp_data_df[['TIME_PERIOD: Time Period']].copy()
new_test_quarter_df.head()

In [None]:
# Drop duplicates
new_test_quarter_df = new_test_quarter_df.sort_values('TIME_PERIOD: Time Period').drop_duplicates('TIME_PERIOD: Time Period', keep='last')

In [None]:
# Create Index Column with Unique Values
new_test_quarter_df.reset_index(level=0, inplace=True)
new_test_quarter_df

In [None]:
# Rename Column Headers to match Schemata
new_test_quarter_df.rename(columns = {'index':'quarter_id', 'TIME_PERIOD: Time Period': 'date_quarter'}, inplace=True)
new_test_quarter_df

----------

# Population_Change Table

<!-- ![states_table.png](attachment:states_table.png) -->
<img src="../Images/population_change_t.png" 
     align="left" 
     width="250" />

In [None]:
# Copy column to new DF
population_df = erp_data_df[['TIME_PERIOD: Time Period', 'REGION: Region', 'OBS_VALUE']].copy()
population_df.head()

In [None]:
# Change column headers to match shemata
population_df.rename(columns = {'REGION: Region':'state_id', 'TIME_PERIOD: Time Period': 'quarter_id', 'OBS_VALUE':'net_change'}, inplace=True)
population_df

In [None]:
# replace quarter_id names with keys from quarter_df
key_list = list(population_df['quarter_id'])
dict_lookup = dict(zip(new_test_quarter_df['date_quarter'], new_test_quarter_df['quarter_id']))
population_df['quarter_id'] = [dict_lookup[item] for item in key_list]
population_df.head()

In [None]:
# replace state_id names with keys from region_df
key_list = list(population_df['state_id'])
dict_lookup = dict(zip(new_test_region_df['state_name'], new_test_region_df['state_id']))
population_df['state_id'] = [dict_lookup[item] for item in key_list]
population_df.head()

In [None]:
# replace NaN with 0
population_df['net_change'] = population_df['net_change'].fillna(0)
population_df

-------------------

# Average_Earnings Table

<!-- ![states_table.png](attachment:states_table.png) -->
<img src="../Images/average_earnings_t.png" 
     align="left" 
     width="250" />

------------

# Internal_Net_Change Table

<!-- ![states_table.png](attachment:internal_net_change_t.png) -->
<img src="../Images/internal_net_change_t.png" 
     align="left" 
     width="250" />

----------

# Internal_Arrivals Table

<!-- ![states_table.png](attachment:internal_net_change_t.png) -->
<img src="../Images/internal_arrivals_t.png" 
     align="left" 
     width="250" />

---------------------------

# Internal_Departures Table

<!-- ![states_table.png](attachment:internal_net_change_t.png) -->
<img src="../Images/internal_departures_t.png" 
     align="left" 
     width="250" />

---------------------------

# Analysis Table

<!-- ![states_table.png](attachment:internal_net_change_t.png) -->
<img src="../Images/analysis_t.png" 
     align="left" 
     width="250" />

---------------------------

-----------------

---------------------

### Reset Index's of Finished Tables

In [None]:
# reset index with final data QUARTERS
new_test_quarter_df.reset_index(drop=True)
new_test_quarter_df.head()

In [None]:
# reset index with final data STATES
new_test_region_df.reset_index(drop=True)
new_test_region_df.head()

In [None]:
population_df.reset_index(drop=True)
population_df.tail()

In [None]:
# Test Output - DELETE
# population_df.to_csv('../jon_clean_test.csv')