## Import Libraries

In [1]:

import os
import json
import requests
import itertools
import numpy as np
import pandas as pd
from pprint import pprint
from sqlalchemy.orm import Session
from sqlalchemy import create_engine


## Read in CSV for adding State Codes

In [2]:
# Import state code CSV
file_path = os.path.join(".","static","data","state_codes.csv")
state_codes_df = pd.read_csv(file_path)
state_codes_df = state_codes_df.rename(columns={"State":"STATE DESCRIPTION",
                                                "Code":"STATE CODE"})
state_codes_df = state_codes_df[["STATE DESCRIPTION","STATE CODE"]]

state_codes_df.head()

Unnamed: 0,STATE DESCRIPTION,STATE CODE
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


## Connect to Postgress SQL DB

In [3]:

pg_user = 'postgres'
pg_password = 'password'
db_name = 'Enterprises'

connection_string = f"{pg_user}:{pg_password}@localhost:5432/{db_name}"
engine = create_engine(f'postgresql://{connection_string}')



#### Get table Names

In [4]:

engine.table_names()


['NAICS', 'BUSINESSES']

#### Create dataframes from SQL Query

In [5]:

bus_df = pd.read_sql_query('select * from "BUSINESSES"', con=engine)
naics_df = pd.read_sql_query('select * from "NAICS"', con=engine)


#### Refine Dataframe to only include relevant columns

In [6]:

business_df = bus_df[[ 'STATE DESCRIPTION', 'NAICS CODE', 'ENTERPRISE EMPLOYMENT SIZE 2', 'NUMBER OF FIRMS', 'NUMBER OF ESTABLISHMENTS', 'EMPLOYMENT', 'ANNUAL PAYROLL ($1,000)', 'YEAR']].copy()
business_df = business_df.rename(columns = {'ENTERPRISE EMPLOYMENT SIZE 2': 'ENTERPRISE EMPLOYMENT SIZE' })
business_df.head()


Unnamed: 0,STATE DESCRIPTION,NAICS CODE,ENTERPRISE EMPLOYMENT SIZE,NUMBER OF FIRMS,NUMBER OF ESTABLISHMENTS,EMPLOYMENT,"ANNUAL PAYROLL ($1,000)",YEAR
0,Alabama,11,Total,870,890,6428,208455,2008
1,Alabama,11,0-4,433,433,867,25451,2008
2,Alabama,11,5-9,259,259,1708,48440,2008
3,Alabama,11,10-19,112,112,1468,47620,2008
4,Alabama,11,<20,804,804,4043,121511,2008


#### Convert Payroll column to real value from rounded value

In [7]:

business_df['ANNUAL PAYROLL'] = business_df['ANNUAL PAYROLL ($1,000)'] * 1000


#### Delete Original Payroll Column

In [8]:

del business_df['ANNUAL PAYROLL ($1,000)']


#### Create joins to establish consolidated dataframe

In [9]:

business_df1 = pd.merge(business_df,naics_df,on='NAICS CODE',how='left')
business_df1.head()


Unnamed: 0,STATE DESCRIPTION,NAICS CODE,ENTERPRISE EMPLOYMENT SIZE,NUMBER OF FIRMS,NUMBER OF ESTABLISHMENTS,EMPLOYMENT,YEAR,ANNUAL PAYROLL,NAME
0,Alabama,11,Total,870,890,6428,2008,208455000,"Agriculture, Forestry, Fishing and Hunting"
1,Alabama,11,0-4,433,433,867,2008,25451000,"Agriculture, Forestry, Fishing and Hunting"
2,Alabama,11,5-9,259,259,1708,2008,48440000,"Agriculture, Forestry, Fishing and Hunting"
3,Alabama,11,10-19,112,112,1468,2008,47620000,"Agriculture, Forestry, Fishing and Hunting"
4,Alabama,11,<20,804,804,4043,2008,121511000,"Agriculture, Forestry, Fishing and Hunting"


#### Reorder Dataframe

In [14]:

business_df1 = business_df1[['YEAR','STATE DESCRIPTION', 'NAICS CODE', 'NAME', 'ENTERPRISE EMPLOYMENT SIZE', 'NUMBER OF FIRMS', 'NUMBER OF ESTABLISHMENTS', 'EMPLOYMENT', 'ANNUAL PAYROLL']].copy()
business_df1.head()


Unnamed: 0,YEAR,STATE DESCRIPTION,NAICS CODE,NAME,ENTERPRISE EMPLOYMENT SIZE,NUMBER OF FIRMS,NUMBER OF ESTABLISHMENTS,EMPLOYMENT,ANNUAL PAYROLL
0,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",Total,870,890,6428,208455000
1,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",0-4,433,433,867,25451000
2,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",5-9,259,259,1708,48440000
3,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",10-19,112,112,1468,47620000
4,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",<20,804,804,4043,121511000


### Table with all relavent data and joined state codes to be jsonified

In [17]:
# Data Table
table_df = pd.merge(business_df1, state_codes_df, on="STATE DESCRIPTION",how = "left")

table_df = table_df.reset_index['STATE CODE']
table_df.head()

Unnamed: 0,YEAR,STATE DESCRIPTION,NAICS CODE,NAME,ENTERPRISE EMPLOYMENT SIZE,NUMBER OF FIRMS,NUMBER OF ESTABLISHMENTS,EMPLOYMENT,ANNUAL PAYROLL,STATE CODE
0,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",Total,870,890,6428,208455000,AL
1,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",0-4,433,433,867,25451000,AL
2,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",5-9,259,259,1708,48440000,AL
3,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",10-19,112,112,1468,47620000,AL
4,2008,Alabama,11,"Agriculture, Forestry, Fishing and Hunting",<20,804,804,4043,121511000,AL
...,...,...,...,...,...,...,...,...,...,...
9456,2008,Wyoming,48-49,Transportation and Warehousing,10-19,59,60,740,30106000,WY
9457,2008,Wyoming,48-49,Transportation and Warehousing,<20,695,697,2281,87567000,WY
10939,2009,District of Columbia,11,"Agriculture, Forestry, Fishing and Hunting",Total,2,2,0,0,DC
11582,2009,Hawaii,99,Industries not classified,Total,54,54,0,981000,HI
