## Project Note: 

While we cleaned and transformed our csv files using Pandas and Python in this Notebook, we ultimatedly decided not to use this method for our project, as it was more efficient to do the data transformation in SQL and PGAdmin. 

We are including this file in our repositiory as an example for how the data transformation could be done with Pandas and Python, since the work was already completed. 

As the original files have been cleaned via SQL - this code below will no longer run without errors (the columns to be dropped don't exist, etc..) 

In [None]:
# Import Dependancies
import pandas as pd
import numpy as np


## Median Income Data Transform

In [None]:
# Load In Median Income Datafile
file_to_load = "resources/Census_2019_MedianIncome.csv"

In [None]:
# Save Median Income file as dataframe
medianIncome_df = pd.io.parsers.read_csv(file_to_load, dtype={'Zip Code': 'str'})
medianIncome_df.head()

In [None]:
# Dropping Columns Not Needed
medianIncome_clean_df = medianIncome_df.drop(['Estimate Households Total Less than $10,000',
       'Margin of Error Households Total Less than $10,000',
       'Estimate Households Total $10,000 to $14,999',
       'Margin of Error Households Total $10,000 to $14,999',
       'Estimate Households Total $15,000 to $24,999',
       'Margin of Error Households Total $15,000 to $24,999',
       'Estimate Households Total $25,000 to $34,999',
       'Margin of Error Households Total $25,000 to $34,999',
       'Estimate Households Total $35,000 to $49,999',
       'Margin of Error Households Total $35,000 to $49,999',
       'Estimate Households Total $50,000 to $74,999',
       'Margin of Error Households Total $50,000 to $74,999',
       'Estimate Households Total $75,000 to $99,999',
       'Margin of Error Households Total $75,000 to $99,999',
       'Estimate Households Total $100,000 to $149,999',
       'Margin of Error Households Total $100,000 to $149,999',
       'Estimate Households Total $150,000 to $199,999',
       'Margin of Error Households Total $150,000 to $199,999',
       'Estimate Households Total $200,000 or more',
       'Margin of Error Households Total $200,000 or more',
      ], axis=1)

In [None]:
# Renaming Columns Needed
medianIncome_df = medianIncome_clean_df.rename(columns={"Zip Code": "Zip_Code", "Total Households": "Total_Households", "Margin of Error Households Total": "Total_Households_Margin_Error", "Estimate Households Median income (dollars)":"Median_Households_Estimate_Income", "Margin of Error Households Median income (dollars)":"Median_Households_Margin_Error", "Estimate Households Mean income (dollars)":"Mean_Households_Estimate_Income", "Margin of Error Households Mean income (dollars)":"Mean_Households_Margin_Error"}, errors="raise")
medianIncome_df.head()

In [None]:
# Writing File to CSV
medianIncome_df.to_csv("/Users/jennifermadson/Desktop/Data-Analytics-BootCamp/School-Projects/data_miners/resources/medianIncome.csv", index=False)

## Zip County Data Transform

In [None]:
# Load In Zip County Datafile
file_to_load_B = "resources/ORIG_ZIP_COUNTY_FIPS_201706.csv"

In [None]:
# Save Zip County file as dataframe
zipCounty_df = pd.io.parsers.read_csv(file_to_load_B, dtype={'Zip': 'str'})
zipCounty_df.head()

In [None]:
# Dropping Columns Not Needed
zipCounty_clean_df = zipCounty_df.drop(['CLASSFP'], axis=1)

In [None]:
# Renaming Columns Needed
zipCounty__df = zipCounty_clean_df.rename(columns={"ZIP": "Zip_Code", "COUNTYNAME": "County_Name", "STATE": "State_Code", "STCOUNTYFP":"County_Code"}, errors="raise")
zipCounty__df.head()

In [None]:
# Writing File to CSV
zipCounty__df.to_csv("/Users/jennifermadson/Desktop/Data-Analytics-BootCamp/School-Projects/data_miners/resources/zipCounty.csv", index=False)  

## Electric Vehicles Data Transform

In [None]:
# Load In EV Datafile
file_to_load_C = "resources/ORIG_ev_stations_v1.csv"

In [None]:
# Save EV file as dataframe
ev_df = pd.io.parsers.read_csv(file_to_load_C, dtype={'ZIP': 'str', 'EV Other Info': 'str'})
ev_df.head()

In [None]:
ev_df.columns

In [None]:
# Dropping Columns Not Needed
ev_clean_df = ev_df.drop(['Fuel Type Code',
       'Intersection Directions', 'Plus4', 'Station Phone', 'Expected Date',
       'Groups With Access Code', 'Cards Accepted',
       'BD Blends', 'NG Fill Type Code', 'NG PSI', 'EV Other Info',
       'EV Network Web', 'Geocode Status',
       'Date Last Confirmed', 'Updated At',
       'Federal Agency ID', 'Federal Agency Name',
       'Hydrogen Status Link', 'NG Vehicle Class', 'LPG Primary',
       'E85 Blender Pump', 'EV Connector Types', 'Country',
       'Intersection Directions (French)', 'Access Days Time (French)',
       'BD Blends (French)', 'Groups With Access Code (French)',
       'Hydrogen Is Retail', 'Access Detail Code',
       'Federal Agency Code', 'CNG Dispenser Num',
       'CNG On-Site Renewable Source', 'CNG Total Compression Capacity',
       'CNG Storage Capacity', 'LNG On-Site Renewable Source',
       'E85 Other Ethanol Blends', 'EV Pricing (French)',
       'LPG Nozzle Types', 'Hydrogen Pressures', 'Hydrogen Standards',
       'CNG Fill Type Code', 'CNG PSI', 'CNG Vehicle Class',
       'LNG Vehicle Class', 'EV On-Site Renewable Source',
       'Restricted Access'], axis=1)

In [None]:
# Renaming Columns Needed
ev_df = ev_clean_df.rename(columns={"Station Name": "Station_Name", "Street Address": "Street_Address", "State": "State_Code", "ZIP":"Zip_Code", "Status Code":"Status_Code","Access Days Time":"Access_Days_Time","EV Level1 EVSE Num":"EV_Level1_EVSE_Num","EV Level2 EVSE Num":"EV_Level2_EVSE_Num","EV DC Fast Count":"EV_DC_Fast_Count","EV Network":"EV_Network","ID":"EV_ID","Owner Type Code":"Owner_Type_Code","Open Date":"Open_Date","Access Code":"Access_Code","Facility Type":"Facility_Type","EV Pricing":"Charging_Cost",}, errors="raise")
ev_df.head()

In [None]:
# Checking the Shape for dropping status codes
ev_df.shape

In [None]:
# Dropping rows where Status Code is equal to P and checking the shape 
ev_df.drop(ev_df[ev_df['Status_Code'] == "P"].index, inplace = True)
ev_df.shape

In [None]:
# Writing File to CSV
ev__df.to_csv("/Users/jennifermadson/Desktop/Data-Analytics-BootCamp/School-Projects/data_miners/resources/ev.csv", index=False)  