In [116]:
import psycopg2
import pandas as pd
import matplotlib.pyplot as plt

In [13]:
DBNAME = "opportunity_youth"

In [14]:
conn = psycopg2.connect(dbname=DBNAME)

In [15]:
cursor = conn.cursor()
cursor.execute("""SELECT table_name FROM information_schema.tables
       WHERE table_schema = 'public'""")
tables = []
for table in cursor.fetchall():
    tables.append(table[0])
tables

['pums_2017',
 'puma_names_2010',
 'wa_jobs_2017',
 'wa_geo_xwalk',
 'ct_puma_xwalk']

In [16]:
pums_2017_df = pd.read_sql("SELECT * FROM pums_2017;", conn)
puma_names_2010_df = pd.read_sql("SELECT * FROM puma_names_2010;", conn)
wa_jobs_2017_df = pd.read_sql("SELECT * FROM wa_jobs_2017;", conn)
wa_geo_xwalk_df = pd.read_sql("SELECT * FROM wa_geo_xwalk;", conn)
ct_puma_xwalk_df = pd.read_sql("SELECT * FROM ct_puma_xwalk;", conn)

In [170]:
geoid_in_king_county_df = pd.read_sql("""
SELECT geoid 
FROM puma_names_2010 
WHERE state_name 
LIKE 'Washington%' 
AND puma_name LIKE 
'King County%';""", conn)
geoid_in_king_county = [x for x in geoid_in_king_county_df]
geoid_in_king_county

['geoid']

In [147]:
# CA01 Num Number of jobs for workers age 29 or younger
# CA02 Num Number of jobs for workers age 30 to 54
# CA03 Num Number of jobs for workers age 55 or older
# CE01 Num Number of jobs with earnings $1250/month or less
# CE02 Num Number of jobs with earnings $1251/month to $3333/month
# CE03 Num Number of jobs with earnings greater than $3333/month 
# 29 CR01 Num Number of jobs for workers with Race: White
# 30 CR02 Num Number of jobs for workers with Race: Black or African American
# 31 CR03 Num Number of jobs for workers with Race: American Indian or Alaska Native
# 32 CR04 Num Number of jobs for workers with Race: Asian
# 33 CR05 Num Number of jobs for workers with Race: Native Hawaiian or Other Pacific Islander
# 34 CR07 Num Number of jobs for workers with Race: Two or More Race Groups
# 35 CT01 Num Number of jobs for workers with Ethnicity: Not Hispanic or Latino
# 36 CT02 Num Number of jobs for workers with Ethnicity: Hispanic or Latino
# CS01 Num Number of jobs for workers with Sex: Male17
# CS02 Num Number of jobs for workers with Sex: Female17

# JOIN puma_names_2010 P
# ON SUBSTRING(j.w_geocode, 0, 8) = P.geoid # but need to get first 7 ch from w_geocode
# WHERE state_name LIKE 'Washington%' AND puma_name LIKE 'King County%'

wa_jobs_2017_df = pd.read_sql("""
SELECT CA01, CA02, CA03, CE01, CE02, CE03, CR01, CR02, CR03, CR04, CR05, CT02, CR07, CS01, CS02, w_geocode
FROM wa_jobs_2017;
""", conn)

wa_jobs_2017_df.columns = ["29 or younger","30 to 54", "55 or older",
                           "1250/month or less", "$1251/month to $3333/month", "greater than $3333/month",
                           "White", "Black or African American", "American Indian or Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Hispanic or Latino", "Two or More Race Groups",
                           "Male", "Female", "geocode"]

wa_jobs_2017_by_age_df = wa_jobs_2017_df.filter(["29 or younger","30 to 54", "55 or older"], axis=1)
wa_jobs_2017_by_income_df = wa_jobs_2017_df.filter(["1250/month or less", "$1251/month to $3333/month", "greater than $3333/month"], axis=1)
wa_jobs_2017_by_race_df = wa_jobs_2017_df.filter(["White", "Black or African American", "American Indian or Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Hispanic or Latino", "Two or More Race Groups"], axis=1)
wa_jobs_2017_by_sex_df = wa_jobs_2017_df.filter(['Male', 'Female'], axis=1)

# fig, ax = plt.subplots(figsize=(16,8))
# ax.bar(wa_jobs_2017_by_race_df.columns, wa_jobs_2017_by_race_df.sum())
# plt.xticks(rotation=90);


29 or younger                                 721616.0
30 to 54                                     1756482.0
55 or older                                   710919.0
1250/month or less                            579878.0
$1251/month to $3333/month                    964457.0
greater than $3333/month                     1644682.0
White                                        2571281.0
Black or African American                     140160.0
American Indian or Alaska Native               43293.0
Asian                                         307549.0
Native Hawaiian or Other Pacific Islander      19402.0
Hispanic or Latino                            332464.0
Two or More Race Groups                       107332.0
Male                                         1647982.0
Female                                       1541035.0
geocode                                            inf
dtype: float64


In [44]:
#wa_geo_xwalk
# tabblk2010 Char15 2010 Census Tabulation Block Code
# st Char2 FIPS State Code
# stname Char100 State Name
# cty Char5 FIPS County Code
# ctyname Char100 County or County Equivalent Name
# trct Char11 Census Tract Code
# trctname Char100 Tract Name, formatted with County and State
# zcta Char5 ZIP Code Tabulation Area (ZCTA) Code
# zctaname Char100 ZCTA Name
# stplc Char7 Nationally Unique Place Code, (FIPS State + FIPS Place)
# stplcname Char100 Place Name
# ctycsub Char10 Nationally Unique County Subdivision Code, (FIPS State + FIPS County + FIPS County Subdivision)
# ctycsubname Char100 County Subdivision Name
# blklatdd Num Latitude (in decimal degrees) of block internal point.23
# blklondd Num Longitude (in decimal degrees) of block internal point. 23
# createdate Char8 Date on which data was created, formatted as YYYYMMDD 

wa_geo_xwalk_df = pd.read_sql("""
SELECT ctyname, ctycsubname, blklatdd, blklondd
FROM wa_geo_xwalk
WHERE ctyname LIKE 'King County%';
""", conn)
wa_geo_xwalk_df


Unnamed: 0,ctyname,ctycsubname,blklatdd,blklondd
0,"King County, WA ...","Snoqualmie Valley CCD (King, WA) ...",47.690229,-121.715711
1,"King County, WA ...","Snoqualmie Valley CCD (King, WA) ...",47.707243,-121.626151
2,"King County, WA ...","Seattle CCD (King, WA) ...",47.369791,-122.241507
3,"King County, WA ...","Seattle CCD (King, WA) ...",47.501092,-122.324844
4,"King County, WA ...","Seattle CCD (King, WA) ...",47.483771,-122.378470
...,...,...,...,...
35826,"King County, WA ...","Federal Way-Auburn CCD (King, WA) ...",47.363858,-122.346140
35827,"King County, WA ...","Seattle CCD (King, WA) ...",47.622133,-122.385803
35828,"King County, WA ...","Vashon Island CCD (King, WA) ...",47.511522,-122.482877
35829,"King County, WA ...","Seattle CCD (King, WA) ...",47.590672,-122.367992


In [133]:
pd.read_sql("""SELECT * FROM puma_names_2010 WHERE state_name LIKE 'Washington%' AND puma_name LIKE 'King County%';""", conn)

Unnamed: 0,state_fips,state_name,cpuma0010,puma,geoid,gisjoin,puma_name
0,53,Washington ...,1044,11606,5311606,G53011606,"King County (Northwest)--Shoreline, Kenmore & ..."
1,53,Washington ...,1044,11607,5311607,G53011607,"King County (Northwest)--Redmond, Kirkland Cit..."
2,53,Washington ...,1044,11608,5311608,G53011608,King County (Northwest Central)--Greater Belle...
3,53,Washington ...,1044,11609,5311609,G53011609,"King County (Central)--Sammamish, Issaquah, Me..."
4,53,Washington ...,1044,11610,5311610,G53011610,"King County (Central)--Renton City, Fairwood, ..."
5,53,Washington ...,1044,11613,5311613,G53011613,King County (Southwest Central)--Kent City ...
6,53,Washington ...,1044,11614,5311614,G53011614,King County (Southwest)--Auburn City & Lakelan...
7,53,Washington ...,1044,11615,5311615,G53011615,"King County (Southeast)--Maple Valley, Covingt..."
8,53,Washington ...,1044,11616,5311616,G53011616,"King County (Northeast)--Snoqualmie City, Cott..."
9,53,Washington ...,1045,11611,5311611,G53011611,"King County (West Central)--Burien, SeaTac, Tu..."


In [9]:
#puma : area code id
#agep : age
#cow : class of working 9 .Unemployed and last worked 5 years ago or earlier or never
#wkl : When last worked
#sch : education

puma_names_2010_df = pd.read_sql("""
SELECT puma, agep, cow, wkl, sch, schl, rt
FROM pums_2017
WHERE (agep BETWEEN 16.0 AND 24.9)
AND rt = 'P'
AND CAST(sch AS int) = 1 OR sch IS null
AND CAST(cow AS int) = 9
""", conn)
puma_names_2010_df

Unnamed: 0,puma,agep,cow,wkl,sch,schl,rt
0,11801,17.0,1,1,1,16,P
1,10800,23.0,1,1,1,16,P
2,11802,22.0,5,1,1,16,P
3,10300,19.0,1,1,1,16,P
4,10703,24.0,1,1,1,16,P
...,...,...,...,...,...,...,...
15147,10702,21.0,,3,1,17,P
15148,11705,23.0,4,2,1,17,P
15149,11603,22.0,,3,1,12,P
15150,11701,21.0,1,1,1,14,P


In [11]:
df_filtered_joined = pd.read_sql("""
SELECT serialno, puma, agep, cow, wkl, sch, schl, rt, puma_name
FROM pums_2017
JOIN puma_names_2010
USING (puma)
WHERE (agep BETWEEN 16.0 AND 24.9)
AND rt = 'P'
AND CAST(sch AS int) = 1 OR sch IS null
AND CAST(cow AS int) = 9
AND state_name LIKE 'Washington%'
AND puma_name LIKE 'King County%'
""", conn)
df_filtered_joined = df_filtered_joined.drop_duplicates().reset_index(drop=True)
df_filtered_joined

Unnamed: 0,serialno,puma,agep,cow,wkl,sch,schl,rt,puma_name
0,2013000000181,11801,17.0,1,1,1,16,P,Kitsap County (North)--Bainbridge Island City ...
1,2013000000664,10800,23.0,1,1,1,16,P,Grant & Kittitas Counties ...
2,2013000001126,11802,22.0,5,1,1,16,P,Kitsap County (South)--Bremerton & Port Orchar...
3,2013000001584,10300,19.0,1,1,1,16,P,Chelan & Douglas Counties ...
4,2013000001874,10703,24.0,1,1,1,16,P,"Walla Walla, Benton (Outer) & Franklin (Outer)..."
...,...,...,...,...,...,...,...,...,...
21729,2017001535873,11801,20.0,5,1,1,16,P,Kitsap County (North)--Bainbridge Island City ...
21730,2017001536752,11801,22.0,5,1,1,16,P,Kitsap County (North)--Bainbridge Island City ...
21731,2017001534402,11603,22.0,,3,1,12,P,Seattle City (Downtown)--Queen Anne & Magnolia...
21732,2017001536310,11701,21.0,1,1,1,14,P,"Snohomish County (Southwest)--Edmonds, Lynnwoo..."
