# Load Data

First we will create the dataframe for the Colorado employment data including the adjusted and nonadjusted totals.

In [5]:
import pandas as pd

# Load data, specifying that the FIPS column should be a string to not drop the leading 0
co_employment = pd.read_csv ('~/jupyter/sp22Capstone_01_Group02/data/COCES_Combined.csv')

co_employment.columns = ['area','date','industry_code','industry','adjusted_employment','nonadjusted_employment']

co_employment['date'] = pd.to_datetime(co_employment.date)

co_employment['area'] = co_employment['area'].astype(str)


#df.columns = ['area','date','seriescode','title','currentmonth']

#pd.to_datetime(df['Time Period'], format='%B%Y')

co_employment.head()

Unnamed: 0,area,date,industry_code,industry,adjusted_employment,nonadjusted_employment
0,Colorado,1990-01-01,0,Total Nonfarm,1502200,1477700
1,Colorado,1990-01-01,5000000,Total Private,1227600,1209900
2,Colorado,1990-01-01,6000000,Goods Producing,249900,241200
3,Colorado,1990-01-01,7000000,Service-Providing,1252300,1236500
4,Colorado,1990-01-01,8000000,Private Service Providing,977700,968700


In [6]:
co_employment.dtypes

area                              object
date                      datetime64[ns]
industry_code                      int64
industry                          object
adjusted_employment                int64
nonadjusted_employment             int64
dtype: object

# We will likely also want to know which indutry codes correspond to which industries for simplicity.

In [7]:
co_employment.groupby(['industry_code','industry']).size().reset_index().rename(columns={0:'count'})

Unnamed: 0,industry_code,industry,count
0,0,Total Nonfarm,384
1,5000000,Total Private,384
2,6000000,Goods Producing,384
3,7000000,Service-Providing,384
4,8000000,Private Service Providing,384
5,10000000,Mining and Logging,384
6,15000000,"Mining, Logging and Construction",384
7,20000000,Construction,384
8,30000000,Manufacturing,384
9,31000000,Durable Goods,384


# Finally, we will need to create a sql table for each dataframe.

In [8]:
import getpass
from sqlalchemy.engine.url import URL
from sqlalchemy import create_engine
%reload_ext sql

mypasswd = getpass.getpass()
username = 'nnfd2' # Replace with your pawprint
host = 'pgsql.dsa.lan'
database = 'caponl_22g2'

postgres_db = {'drivername': 'postgres',
               'username': username,
               'password': mypasswd,
               'host': host,
               'database': database}
engine = create_engine(URL(**postgres_db), echo=False)


connection_string = f'postgres://{username}:{mypasswd}@{host}/{database}'
%sql $connection_string
del mypasswd

········


In [13]:
%%sql

drop table if exists co_employment cascade;

create table co_employment (
    area varchar(100),
    date date,
    industry_code int,
    industry varchar(100),
    adjusted_employment int,
    nonadjusted_employment int,
    constraint pk_co_employment primary key (date,industry)
);

grant all privileges on co_employment to nnfd2, dgyw5, jwcp64, gfdbq

 * postgres://nnfd2:***@pgsql.dsa.lan/caponl_22g2
Done.
Done.
Done.


[]

# Load Data to Tables

In [14]:
co_employment.to_sql ('co_employment', engine, if_exists='append', index=False, method='multi', chunksize=1000)    

In [17]:
%%sql

select date,industry,sum(adjusted_employment),sum(nonadjusted_employment)
from co_employment
where industry_code=70000000
group by industry,date
order by date



 * postgres://nnfd2:***@pgsql.dsa.lan/caponl_22g2
384 rows affected.


date,industry,sum,sum_1
1990-01-01,Leisure and Hospitality,164600,163700
1990-02-01,Leisure and Hospitality,164600,164500
1990-03-01,Leisure and Hospitality,165700,167700
1990-04-01,Leisure and Hospitality,166500,165800
1990-05-01,Leisure and Hospitality,167400,160200
1990-06-01,Leisure and Hospitality,167700,171900
1990-07-01,Leisure and Hospitality,167400,174700
1990-08-01,Leisure and Hospitality,168500,177200
1990-09-01,Leisure and Hospitality,168600,169200
1990-10-01,Leisure and Hospitality,168200,160300
