# ETL Project

## Import Dependencies and Setup

In [5]:

import pandas as pd
from sqlalchemy import create_engine

## Extract data

In [6]:
fast_food_file = 'Resources/Fast_Food_Restaurants.csv'

In [7]:
fast_food_df = pd.read_csv(fast_food_file)
fast_food_df.head()

Unnamed: 0,id,dateAdded,dateUpdated,address,categories,primaryCategories,city,country,keys,latitude,longitude,name,postalCode,province,sourceURLs,websites
0,AWrSh_KgsVYjT2BJAzaH,2019-05-19T23:58:05Z,2019-05-19T23:58:05Z,2555 11th Avenue,"Fast Food Restaurants,Hamburgers and Hot Dogs,...",Accommodation & Food Services,Greeley,US,us/co/greeley/255511thavenue/554191587,40.39629,-104.69699,Carl's Jr.,80631,CO,https://www.yellowpages.com/greeley-co/mip/car...,https://www.carlsjr.com/?utm_source=Yextandutm...
1,AWEKlA-LIxWefVJwxG9B,2018-01-18T18:30:23Z,2019-05-19T23:45:05Z,2513 Highway 6 And 50,"Restaurant,Mexican Restaurants,Fast Food Resta...",Accommodation & Food Services,Grand Junction,US,us/co/grandjunction/2513highway6and50/1550891556,39.08135,-108.58689,Del Taco,81505,CO,http://www.citysearch.com/profile/772076870/gr...,http://www.deltaco.com
2,AWrSfAcYsVYjT2BJAzPt,2019-05-19T23:45:04Z,2019-05-19T23:45:04Z,1125 Patterson Road,"Sandwich Shops,Fast Food Restaurants,Restauran...",Accommodation & Food Services,Grand Junction,US,us/co/grandjunction/1125pattersonroad/-2137447852,39.09148,-108.55411,Which Wich,81506,CO,https://www.yellowpages.com/grand-junction-co/...,http://www.whichwich.com
3,AWrSa3NAQTFama1Xpkbz,2019-05-19T23:26:58Z,2019-05-19T23:26:58Z,3455 N Salida Court,"Fast Food Restaurants,Mexican Restaurants,Rest...",Accommodation & Food Services,Aurora,US,us/co/aurora/3455nsalidacourt/1143321601,39.76369,-104.77671,Chipotle Mexican Grill,80011,CO,https://www.yellowpages.com/aurora-co/mip/chip...,http://www.chipotle.com
4,AWrSaVGzZ4Yw-wtdgcaB,2019-05-19T23:24:38Z,2019-05-19T23:24:38Z,5225 E Colfax Avenue,"Fast Food Restaurants,Mexican Restaurants,Rest...",Accommodation & Food Services,Denver,US,us/co/denver/5225ecolfaxavenue/-864103396,39.74044,-104.92636,Taco Bell,80220,CO,https://www.yellowpages.com/denver-co/mip/taco...,https://locations.tacobell.com/co/denver/5225-...


## Transforming data

In [8]:
#Creating a filtered dataframe from specific columns
fast_food_cols = ['id', 'categories', 'city', 'name', 'province']
fast_food_transformed = fast_food_df[fast_food_cols].copy()
fast_food_transformed

Unnamed: 0,id,categories,city,name,province
0,AWrSh_KgsVYjT2BJAzaH,"Fast Food Restaurants,Hamburgers and Hot Dogs,...",Greeley,Carl's Jr.,CO
1,AWEKlA-LIxWefVJwxG9B,"Restaurant,Mexican Restaurants,Fast Food Resta...",Grand Junction,Del Taco,CO
2,AWrSfAcYsVYjT2BJAzPt,"Sandwich Shops,Fast Food Restaurants,Restauran...",Grand Junction,Which Wich,CO
3,AWrSa3NAQTFama1Xpkbz,"Fast Food Restaurants,Mexican Restaurants,Rest...",Aurora,Chipotle Mexican Grill,CO
4,AWrSaVGzZ4Yw-wtdgcaB,"Fast Food Restaurants,Mexican Restaurants,Rest...",Denver,Taco Bell,CO
...,...,...,...,...,...
9995,AVwd7U0T_7pvs4fz-fS_,"American Restaurants,Caterers,Soul Food Restau...",Bessemer,Church's Chicken Of Birmingham,AL
9996,AV1XW1fF3D1zeR_xFKHL,"Mexican Restaurants,Caterers,Delicatessens,Iri...",Raleigh,Luxury Box,NC
9997,AV5UVbkiRxPSIh2RiQxV,"Carry-out food,Hot dog restaurants,Fast Food R...",Plover,McDonald's,WI
9998,AVwc9BxYByjofQCxj_GW,"Carry-out food,Caterers,Restaurant delivery se...",Hendersonville,Pizza Hut,TN


In [9]:
#Rename columns
fast_food_transformed = fast_food_transformed.rename(columns={'id': 'ID', 'categories':'Categories', 
                                                              'city':'City',
                                                              'name':'Restaurant_Name', 'province': 'State'})
fast_food_transformed

Unnamed: 0,ID,Categories,City,Restaurant_Name,State
0,AWrSh_KgsVYjT2BJAzaH,"Fast Food Restaurants,Hamburgers and Hot Dogs,...",Greeley,Carl's Jr.,CO
1,AWEKlA-LIxWefVJwxG9B,"Restaurant,Mexican Restaurants,Fast Food Resta...",Grand Junction,Del Taco,CO
2,AWrSfAcYsVYjT2BJAzPt,"Sandwich Shops,Fast Food Restaurants,Restauran...",Grand Junction,Which Wich,CO
3,AWrSa3NAQTFama1Xpkbz,"Fast Food Restaurants,Mexican Restaurants,Rest...",Aurora,Chipotle Mexican Grill,CO
4,AWrSaVGzZ4Yw-wtdgcaB,"Fast Food Restaurants,Mexican Restaurants,Rest...",Denver,Taco Bell,CO
...,...,...,...,...,...
9995,AVwd7U0T_7pvs4fz-fS_,"American Restaurants,Caterers,Soul Food Restau...",Bessemer,Church's Chicken Of Birmingham,AL
9996,AV1XW1fF3D1zeR_xFKHL,"Mexican Restaurants,Caterers,Delicatessens,Iri...",Raleigh,Luxury Box,NC
9997,AV5UVbkiRxPSIh2RiQxV,"Carry-out food,Hot dog restaurants,Fast Food R...",Plover,McDonald's,WI
9998,AVwc9BxYByjofQCxj_GW,"Carry-out food,Caterers,Restaurant delivery se...",Hendersonville,Pizza Hut,TN


In [10]:
fast_food_state = fast_food_transformed.groupby('State').count()#(['State', 'Restaurant_Name']).count()
fast_food_state.rename(columns={'Restaurant_Name': 'Restaurant_Count'}, inplace = True)
fast_food_state

Unnamed: 0_level_0,ID,Categories,City,Restaurant_Count
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AK,64,64,64,64
AL,635,635,635,635
AR,124,124,124,124
AZ,186,186,186,186
CA,727,727,727,727
CO,154,154,154,154
CT,1,1,1,1
DC,18,18,18,18
DE,25,25,25,25
FL,739,739,739,739


In [11]:
#Sorting by states with most restaurants
fast_food_state[['Restaurant_Count']].sort_values('Restaurant_Count',ascending=False)

Unnamed: 0_level_0,Restaurant_Count
State,Unnamed: 1_level_1
OH,922
FL,739
CA,727
NC,663
AL,635
IN,519
TX,426
WA,404
VA,348
OR,334


In [17]:
# Import population by state

population = "Resources/US_population_2018.csv"
population = pd.read_csv(population)
population.head()

Unnamed: 0.1,Unnamed: 0,State,"Population estimate, July 1, 2019[2]","Percent change, 2010–2019[note 1]","Percent of the total U.S. population, 2018[note 3]"
0,0,California,39512223,6.1%,11.96%
1,1,Texas,28995881,15.3%,8.68%
2,2,Florida,21477737,14.2%,6.44%
3,3,New York,19453561,0.4%,5.91%
4,4,Pennsylvania,12801989,0.8%,3.87%


In [13]:
# Push population to database under table name: population

population.to_sql(name='population', con=engine, if_exist="append", index=False)

AttributeError: 'str' object has no attribute 'to_sql'

In [18]:
#get state abbreviations
url= 'https://simple.wikipedia.org/wiki/U.S._postal_abbreviations#States'
tables = pd.read_html(url)
tables

[                                                    0               1
 0   State Abbreviation State Name AK Alaska AL Ala...             NaN
 1                                  State Abbreviation      State Name
 2                                                  AK          Alaska
 3                                                  AL         Alabama
 4                                                  AR        Arkansas
 5                                                  AZ         Arizona
 6                                                  CA      California
 7                                                  CO        Colorado
 8                                                  CT     Connecticut
 9                                                  DE        Delaware
 10                                                 FL         Florida
 11                                                 GA         Georgia
 12                                                 HI          Hawaii
 13   

In [19]:
abbr_df=tables[1]

abbr_df

Unnamed: 0,State Abbreviation,State Name
0,AK,Alaska
1,AL,Alabama
2,AR,Arkansas
3,AZ,Arizona
4,CA,California
5,CO,Colorado
6,CT,Connecticut
7,DE,Delaware
8,FL,Florida
9,GA,Georgia


In [20]:
populate_by_state=pd.merge(population, abbr_df, left_on='State', right_on='State Name')
populate_by_state.head()

Unnamed: 0.1,Unnamed: 0,State,"Population estimate, July 1, 2019[2]","Percent change, 2010–2019[note 1]","Percent of the total U.S. population, 2018[note 3]",State Abbreviation,State Name
0,0,California,39512223,6.1%,11.96%,CA,California
1,1,Texas,28995881,15.3%,8.68%,TX,Texas
2,2,Florida,21477737,14.2%,6.44%,FL,Florida
3,3,New York,19453561,0.4%,5.91%,NY,New York
4,4,Pennsylvania,12801989,0.8%,3.87%,PA,Pennsylvania


In [21]:
populate_by_state.columns

Index(['Unnamed: 0', 'State', 'Population estimate, July 1, 2019[2]',
       'Percent change, 2010–2019[note 1]',
       'Percent of the total U.S. population, 2018[note 3]',
       'State Abbreviation', 'State Name'],
      dtype='object')

In [22]:
populate_by_state=populate_by_state.drop(["Unnamed: 0", "State"], axis=1)

In [23]:
populate_by_state=populate_by_state[["State Abbreviation", "State Name",'Population estimate, July 1, 2019[2]',
       'Percent change, 2010–2019[note 1]',
       'Percent of the total U.S. population, 2018[note 3]']]

In [24]:
populate_by_state

Unnamed: 0,State Abbreviation,State Name,"Population estimate, July 1, 2019[2]","Percent change, 2010–2019[note 1]","Percent of the total U.S. population, 2018[note 3]"
0,CA,California,39512223,6.1%,11.96%
1,TX,Texas,28995881,15.3%,8.68%
2,FL,Florida,21477737,14.2%,6.44%
3,NY,New York,19453561,0.4%,5.91%
4,PA,Pennsylvania,12801989,0.8%,3.87%
5,IL,Illinois,12671821,-1.2%,3.85%
6,OH,Ohio,11689100,1.3%,3.53%
7,GA,Georgia,10617423,9.6%,3.18%
8,NC,North Carolina,10488084,10.0%,3.14%
9,MI,Michigan,9986857,1.0%,3.02%


In [25]:
populate_by_state=populate_by_state.rename (columns={'Population estimate, July 1, 2019[2]':"Population",
       'Percent change, 2010–2019[note 1]': 'Population Percent Change',
       'Percent of the total U.S. population, 2018[note 3]':"Percent of Total Population"})

In [26]:
populate_by_state

Unnamed: 0,State Abbreviation,State Name,Population,Population Percent Change,Percent of Total Population
0,CA,California,39512223,6.1%,11.96%
1,TX,Texas,28995881,15.3%,8.68%
2,FL,Florida,21477737,14.2%,6.44%
3,NY,New York,19453561,0.4%,5.91%
4,PA,Pennsylvania,12801989,0.8%,3.87%
5,IL,Illinois,12671821,-1.2%,3.85%
6,OH,Ohio,11689100,1.3%,3.53%
7,GA,Georgia,10617423,9.6%,3.18%
8,NC,North Carolina,10488084,10.0%,3.14%
9,MI,Michigan,9986857,1.0%,3.02%


In [27]:
populate_by_state=populate_by_state.drop(["Population Percent Change"], axis=1)

In [37]:
populate_by_state=populate_by_state.reset_index()

In [47]:
populate_by_state=populate_by_state.rename(columns={"index":"id", "population": "Population"})
populate_by_state

Unnamed: 0,id,State Abbreviation,State Name,Population,Percent of Total Population
0,0,CA,California,39512223,11.96%
1,1,TX,Texas,28995881,8.68%
2,2,FL,Florida,21477737,6.44%
3,3,NY,New York,19453561,5.91%
4,4,PA,Pennsylvania,12801989,3.87%
5,5,IL,Illinois,12671821,3.85%
6,6,OH,Ohio,11689100,3.53%
7,7,GA,Georgia,10617423,3.18%
8,8,NC,North Carolina,10488084,3.14%
9,9,MI,Michigan,9986857,3.02%


# Loading

In [39]:
from sqlalchemy import create_engine

In [40]:
#connect to local database
rds_connection_string = "todda:pwd1!@#$@localhost:5432/restaurant_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [41]:
engine.table_names()

['restaurant', 'state_population']

In [48]:
populate_by_state.to_sql(name='state_population', con=engine, if_exists='append', index=False)

In [49]:
pd.read_sql_query('select * from state_population', con=engine).head()

Unnamed: 0,id,State Abbreviation,State Name,Population,Percent of Total Population
0,0,CA,California,39512223,11.96%
1,1,TX,Texas,28995881,8.68%
2,2,FL,Florida,21477737,6.44%
3,3,NY,New York,19453561,5.91%
4,4,PA,Pennsylvania,12801989,3.87%


In [52]:
fast_food_transformed.columns

Index(['ID', 'Categories', 'City', 'Restaurant_Name', 'State'], dtype='object')

In [53]:
fast_food_transformed.to_sql(name="restaurant", con=engine, if_exists='append', index=False)

In [54]:
pd.read_sql_query('select * from restaurant', con=engine).head()

Unnamed: 0,ID,Categories,City,Restaurant_Name,State
0,AWrSh_KgsVYjT2BJAzaH,"Fast Food Restaurants,Hamburgers and Hot Dogs,...",Greeley,Carl's Jr.,CO
1,AWEKlA-LIxWefVJwxG9B,"Restaurant,Mexican Restaurants,Fast Food Resta...",Grand Junction,Del Taco,CO
2,AWrSfAcYsVYjT2BJAzPt,"Sandwich Shops,Fast Food Restaurants,Restauran...",Grand Junction,Which Wich,CO
3,AWrSa3NAQTFama1Xpkbz,"Fast Food Restaurants,Mexican Restaurants,Rest...",Aurora,Chipotle Mexican Grill,CO
4,AWrSaVGzZ4Yw-wtdgcaB,"Fast Food Restaurants,Mexican Restaurants,Rest...",Denver,Taco Bell,CO
