In [1]:
pip install sodapy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sqlalchemy import create_engine
import requests
import json
from sodapy import Socrata

# California Data

In [3]:
#data collected from https://www.cde.ca.gov/ds/sd/sd/filesabd.asp
#https://www.oregon.gov/ode/reports-and-data/students/Pages/Student-Enrollment-Reports.aspx

url= "http://dq.cde.ca.gov/dataquest/dlfile/dlfile.aspx?cLevel=School&cYear=2019-20&cCat=Enrollment&cPage=filesenr.asp"
ca_enrollment_data = pd.read_table(url)
ca_enrollment_data


Unnamed: 0,CDS_CODE,COUNTY,DISTRICT,SCHOOL,ETHNIC,GENDER,KDGN,GR_1,GR_2,GR_3,...,GR_7,GR_8,UNGR_ELM,GR_9,GR_10,GR_11,GR_12,UNGR_SEC,ENR_TOTAL,ADULT
0,8618206005417,Del Norte,Del Norte County Unified,Margaret Keating Elementary,5,M,2,2,0,0,...,0,0,0,0,0,0,0,0,5,0
1,8618206005417,Del Norte,Del Norte County Unified,Margaret Keating Elementary,9,F,3,1,1,1,...,0,0,0,0,0,0,0,0,10,0
2,8618206005417,Del Norte,Del Norte County Unified,Margaret Keating Elementary,7,F,0,1,1,2,...,0,0,0,0,0,0,0,0,6,0
3,8618206005417,Del Norte,Del Norte County Unified,Margaret Keating Elementary,5,F,0,0,1,2,...,0,0,0,0,0,0,0,0,3,0
4,8618206005417,Del Norte,Del Norte County Unified,Margaret Keating Elementary,9,M,1,0,0,1,...,0,0,0,0,0,0,0,0,6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131611,37681303732625,San Diego,Grossmont Union High,Grossmont High,5,M,0,0,0,0,...,0,0,0,139,125,121,133,0,518,0
131612,37681890000001,San Diego,Lakeside Union Elementary,"Nonpublic, Nonsectarian Schools",5,M,0,0,0,0,...,0,1,0,0,0,0,0,0,2,0
131613,37681890000001,San Diego,Lakeside Union Elementary,"Nonpublic, Nonsectarian Schools",6,F,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
131614,37681890000001,San Diego,Lakeside Union Elementary,"Nonpublic, Nonsectarian Schools",7,F,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0


In [4]:

#modify demo to include ethnicity as columns
ca_enrollment_race = ca_enrollment_data.pivot_table(values='ENR_TOTAL', index='COUNTY', columns='ETHNIC', aggfunc='sum')
ca_enrollment_race.head()

ETHNIC,0,1,2,3,4,5,6,7,9
COUNTY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Alameda,2410.0,597.0,60548.0,2276.0,10324.0,77433.0,20865.0,39186.0,13692.0
Alpine,1.0,36.0,,,,2.0,,26.0,5.0
Amador,32.0,99.0,20.0,8.0,20.0,925.0,20.0,2765.0,277.0
Butte,432.0,690.0,1777.0,100.0,124.0,7469.0,602.0,17002.0,1838.0
Calaveras,5.0,82.0,31.0,5.0,17.0,1128.0,39.0,3671.0,337.0


In [5]:
#rename the ethnicity columns
ca_enrollment_race_2 = ca_enrollment_race.rename(columns = {0:'OTHER',
                                    1:'AMERICAN_INDIAN',
                                    2:'ASIAN',
                                    3:'PACIFIC_ISLANDER',
                                    4:'FILIPINO',
                                    5:'LATINO',
                                    6:'AFRICAN_AMERICAN',
                                    7:'WHITE',
                                    9:'MULTI_RACE'})
ca_enrollment_race_2.reset_index(inplace=True)
ca_enrollment_race_2.head()

ETHNIC,COUNTY,OTHER,AMERICAN_INDIAN,ASIAN,PACIFIC_ISLANDER,FILIPINO,LATINO,AFRICAN_AMERICAN,WHITE,MULTI_RACE
0,Alameda,2410.0,597.0,60548.0,2276.0,10324.0,77433.0,20865.0,39186.0,13692.0
1,Alpine,1.0,36.0,,,,2.0,,26.0,5.0
2,Amador,32.0,99.0,20.0,8.0,20.0,925.0,20.0,2765.0,277.0
3,Butte,432.0,690.0,1777.0,100.0,124.0,7469.0,602.0,17002.0,1838.0
4,Calaveras,5.0,82.0,31.0,5.0,17.0,1128.0,39.0,3671.0,337.0


In [6]:

ca_enrollment_race_2['OTHER_TOTAL'] = ca_enrollment_race_2['OTHER'] + ca_enrollment_race_2['MULTI_RACE']
ca_enrollment_race_2['ASIAN_TOTAL'] = ca_enrollment_race_2['ASIAN'] + ca_enrollment_race_2['FILIPINO']
ca_enrollment_race_2.head(1)
ca_clean = ca_enrollment_race_2[['COUNTY', 'AMERICAN_INDIAN', 'PACIFIC_ISLANDER', 'LATINO','AFRICAN_AMERICAN','WHITE','OTHER_TOTAL','ASIAN_TOTAL']].copy()

#replace NAN with zero
ca_clean.fillna(0,inplace = True)
ca_clean.head()

# Add column of CA
ca_clean['STATE'] = "CA"
ca_clean

ca_clean.rename(
  columns={
    'ASIANS_TOTAL' : 'ASIAN',
    'OTHER_TOTAL' : 'OTHER/MULTI'
  },
  inplace=True
)
ca_clean

ca_clean.reset_index(inplace=False)
#ca_clean.drop(["ETHNIC"], axis=1)
#ca_clean.set_index('index')
#ca_clean.rename_axis(None)

ETHNIC,index,COUNTY,AMERICAN_INDIAN,PACIFIC_ISLANDER,LATINO,AFRICAN_AMERICAN,WHITE,OTHER/MULTI,ASIAN_TOTAL,STATE
0,0,Alameda,597.0,2276.0,77433.0,20865.0,39186.0,16102.0,70872.0,CA
1,1,Alpine,36.0,0.0,2.0,0.0,26.0,6.0,0.0,CA
2,2,Amador,99.0,8.0,925.0,20.0,2765.0,309.0,40.0,CA
3,3,Butte,690.0,100.0,7469.0,602.0,17002.0,2270.0,1901.0,CA
4,4,Calaveras,82.0,5.0,1128.0,39.0,3671.0,342.0,48.0,CA
5,5,Colusa,55.0,9.0,3749.0,33.0,780.0,49.0,55.0,CA
6,6,Contra Costa,534.0,1100.0,65518.0,15329.0,51141.0,14070.0,30714.0,CA
7,7,Del Norte,613.0,3.0,942.0,25.0,2228.0,0.0,211.0,CA
8,8,El Dorado,240.0,78.0,6260.0,350.0,20759.0,1852.0,1587.0,CA
9,9,Fresno,1199.0,480.0,135997.0,10046.0,34222.0,5048.0,20866.0,CA


In [7]:
list(ca_clean.columns)

['COUNTY',
 'AMERICAN_INDIAN',
 'PACIFIC_ISLANDER',
 'LATINO',
 'AFRICAN_AMERICAN',
 'WHITE',
 'OTHER/MULTI',
 'ASIAN_TOTAL',
 'STATE']

In [8]:
ca_clean_df  =  pd.DataFrame(ca_clean.to_records())
ca_clean_df

Unnamed: 0,index,COUNTY,AMERICAN_INDIAN,PACIFIC_ISLANDER,LATINO,AFRICAN_AMERICAN,WHITE,OTHER/MULTI,ASIAN_TOTAL,STATE
0,0,Alameda,597.0,2276.0,77433.0,20865.0,39186.0,16102.0,70872.0,CA
1,1,Alpine,36.0,0.0,2.0,0.0,26.0,6.0,0.0,CA
2,2,Amador,99.0,8.0,925.0,20.0,2765.0,309.0,40.0,CA
3,3,Butte,690.0,100.0,7469.0,602.0,17002.0,2270.0,1901.0,CA
4,4,Calaveras,82.0,5.0,1128.0,39.0,3671.0,342.0,48.0,CA
5,5,Colusa,55.0,9.0,3749.0,33.0,780.0,49.0,55.0,CA
6,6,Contra Costa,534.0,1100.0,65518.0,15329.0,51141.0,14070.0,30714.0,CA
7,7,Del Norte,613.0,3.0,942.0,25.0,2228.0,0.0,211.0,CA
8,8,El Dorado,240.0,78.0,6260.0,350.0,20759.0,1852.0,1587.0,CA
9,9,Fresno,1199.0,480.0,135997.0,10046.0,34222.0,5048.0,20866.0,CA


In [9]:
ca_clean_df.drop(columns='index',inplace =True)
ca_clean_df = ca_clean_df.rename( columns= {'ASIAN_TOTAL':'ASIAN',
                                           'OTHER/MULTI':'OTHER_MULTI'})
ca_clean_df

Unnamed: 0,COUNTY,AMERICAN_INDIAN,PACIFIC_ISLANDER,LATINO,AFRICAN_AMERICAN,WHITE,OTHER_MULTI,ASIAN,STATE
0,Alameda,597.0,2276.0,77433.0,20865.0,39186.0,16102.0,70872.0,CA
1,Alpine,36.0,0.0,2.0,0.0,26.0,6.0,0.0,CA
2,Amador,99.0,8.0,925.0,20.0,2765.0,309.0,40.0,CA
3,Butte,690.0,100.0,7469.0,602.0,17002.0,2270.0,1901.0,CA
4,Calaveras,82.0,5.0,1128.0,39.0,3671.0,342.0,48.0,CA
5,Colusa,55.0,9.0,3749.0,33.0,780.0,49.0,55.0,CA
6,Contra Costa,534.0,1100.0,65518.0,15329.0,51141.0,14070.0,30714.0,CA
7,Del Norte,613.0,3.0,942.0,25.0,2228.0,0.0,211.0,CA
8,El Dorado,240.0,78.0,6260.0,350.0,20759.0,1852.0,1587.0,CA
9,Fresno,1199.0,480.0,135997.0,10046.0,34222.0,5048.0,20866.0,CA


# Oregon Data

In [10]:
url= "https://www.oregon.gov/ode/reports-and-data/students/Documents/fallmembershipreport_20192020.xlsx"

or_enrollment_data = pd.read_excel(url, sheet_name = "District (19-20)")
or_enrollment_data
list(or_enrollment_data.columns)
or_clean = or_enrollment_data[['County', 
                   '2019-20 American Indian/Alaska Native ', 
                   '2019-20 Asian ', 
                   '2019-20 Native Hawaiian/ Pacific Islander', 
                   '2019-20 Black/African American ', 
                   '2019-20 Hispanic/ Latino', 
                   '2019-20 White ', 
                   '2019-20 Multiracial ']].copy()
or_clean = or_clean.groupby(or_clean['County']).sum()

In [11]:
or_clean['STATE'] = "OR"
or_clean

Unnamed: 0_level_0,2019-20 American Indian/Alaska Native,2019-20 Asian,2019-20 Native Hawaiian/ Pacific Islander,2019-20 Black/African American,2019-20 Hispanic/ Latino,2019-20 White,2019-20 Multiracial,STATE
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Baker,42,54,10,31,469,3621,223,OR
Benton,69,420,38,88,1435,6369,652,OR
Clackamas,235,3029,256,639,9274,41984,4305,OR
Clatsop,36,53,24,20,1002,3852,219,OR
Columbia,85,71,25,39,757,5997,480,OR
Coos,266,100,40,91,1265,7401,901,OR
Crook,29,12,3,12,466,2399,150,OR
Curry,70,15,3,9,306,1530,324,OR
Deschutes,179,297,81,161,3732,21810,977,OR
Douglas,224,95,35,47,1452,11583,1008,OR


In [12]:
or_clean_df  =  pd.DataFrame(or_clean.to_records())
or_clean_df

Unnamed: 0,County,2019-20 American Indian/Alaska Native,2019-20 Asian,2019-20 Native Hawaiian/ Pacific Islander,2019-20 Black/African American,2019-20 Hispanic/ Latino,2019-20 White,2019-20 Multiracial,STATE
0,Baker,42,54,10,31,469,3621,223,OR
1,Benton,69,420,38,88,1435,6369,652,OR
2,Clackamas,235,3029,256,639,9274,41984,4305,OR
3,Clatsop,36,53,24,20,1002,3852,219,OR
4,Columbia,85,71,25,39,757,5997,480,OR
5,Coos,266,100,40,91,1265,7401,901,OR
6,Crook,29,12,3,12,466,2399,150,OR
7,Curry,70,15,3,9,306,1530,324,OR
8,Deschutes,179,297,81,161,3732,21810,977,OR
9,Douglas,224,95,35,47,1452,11583,1008,OR


In [13]:
or_clean_df.rename(
  columns={'County':'COUNTY',
 '2019-20 American Indian/Alaska Native ':'AMERICAN_INDIAN',
 '2019-20 Native Hawaiian/ Pacific Islander':'PACIFIC_ISLANDER',
 '2019-20 Hispanic/ Latino':'LATINO',
 '2019-20 Black/African American ':'AFRICAN_AMERICAN',
 '2019-20 White ':'WHITE',
 '2019-20 Multiracial ':'OTHER_MULTI',
 '2019-20 Asian ':'ASIAN',
  },
  inplace=True
)
or_clean_df

Unnamed: 0,COUNTY,AMERICAN_INDIAN,ASIAN,PACIFIC_ISLANDER,AFRICAN_AMERICAN,LATINO,WHITE,OTHER_MULTI,STATE
0,Baker,42,54,10,31,469,3621,223,OR
1,Benton,69,420,38,88,1435,6369,652,OR
2,Clackamas,235,3029,256,639,9274,41984,4305,OR
3,Clatsop,36,53,24,20,1002,3852,219,OR
4,Columbia,85,71,25,39,757,5997,480,OR
5,Coos,266,100,40,91,1265,7401,901,OR
6,Crook,29,12,3,12,466,2399,150,OR
7,Curry,70,15,3,9,306,1530,324,OR
8,Deschutes,179,297,81,161,3732,21810,977,OR
9,Douglas,224,95,35,47,1452,11583,1008,OR


## Washington Data

In [14]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
#client = Socrata("data.wa.gov", None)

# Example authenticated client (needed for non-public datasets):
client = Socrata("data.wa.gov",
                  apitoken,
                  username=username1,
                  password=password1)

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("gtd3-scga", limit=20000)

# Convert to pandas DataFrame
wa_data_df = pd.DataFrame.from_records(results)
wa_data_df


Unnamed: 0,schoolyear,organizationlevel,county,esdname,districtname,schoolname,gradelevel,all_students,female,gender_x,...,non_mobile,non_section_504,students_without_disabilities,dataasof,esdorganizationid,districtcode,districtorganizationid,schoolcode,schoolorganizationid,currentschooltype
0,2019-20,State,Multiple,State Total,State Total,State Total,10th Grade,84690,41154,139,...,82499,79726,74529,2020-02-11T00:00:00.000,,,,,,
1,2019-20,State,Multiple,State Total,State Total,State Total,11th Grade,83731,40877,153,...,81156,78730,74017,2020-02-11T00:00:00.000,,,,,,
2,2019-20,State,Multiple,State Total,State Total,State Total,12th Grade,91585,44319,194,...,86412,86395,79242,2020-02-11T00:00:00.000,,,,,,
3,2019-20,State,Multiple,State Total,State Total,State Total,1st Grade,83765,40675,21,...,81966,82713,73240,2020-02-11T00:00:00.000,,,,,,
4,2019-20,State,Multiple,State Total,State Total,State Total,2nd Grade,83362,40569,29,...,81697,81768,72221,2020-02-11T00:00:00.000,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19925,2019-20,District,Yakima,Educational Service District 105,Zillah School District,District Total,8th Grade,114,50,,...,113,111,103,2020-02-11T00:00:00.000,100002,39205,100305,,,
19926,2019-20,District,Yakima,Educational Service District 105,Zillah School District,District Total,9th Grade,96,38,,...,95,96,85,2020-02-11T00:00:00.000,100002,39205,100305,,,
19927,2019-20,District,Yakima,Educational Service District 105,Zillah School District,District Total,AllGrades,1335,620,,...,1313,1311,1174,2020-02-11T00:00:00.000,100002,39205,100305,,,
19928,2019-20,District,Yakima,Educational Service District 105,Zillah School District,District Total,Kindergarten,77,40,,...,77,77,72,2020-02-11T00:00:00.000,100002,39205,100305,,,


In [15]:
wa_temp = wa_data_df.loc[wa_data_df['organizationlevel'] != 'State'].copy()
wa_temp.fillna(0, inplace=True)
wa_temp['STATE'] = 'WA'
wa_clean = wa_temp[['county',
                    'american_indian_alaskan_native',
                    'asian',
                    'black_african_american',
                    'hispanic_latino_of_any_race',
                    'native_hawaiian_other_pacific',
                    'two_or_more_races',
                    'white']].copy()



wa_clean['asian'] = wa_clean['asian'].astype(str).astype(int)
wa_clean['american_indian_alaskan_native'] = wa_clean['american_indian_alaskan_native'].astype(str).astype(int)
wa_clean['black_african_american'] = wa_clean['black_african_american'].astype(str).astype(int)
wa_clean['hispanic_latino_of_any_race'] = wa_clean['hispanic_latino_of_any_race'].astype(str).astype(int)
wa_clean['native_hawaiian_other_pacific'] = wa_clean['native_hawaiian_other_pacific'].astype(str).astype(int)
wa_clean['two_or_more_races'] = wa_clean['two_or_more_races'].astype(str).astype(int)
wa_clean['white'] = wa_clean['white'].astype(str).astype(int)


wa_clean = wa_clean.groupby(wa_clean['county']).sum()


In [16]:
wa_clean['STATE'] = 'WA'

# wa_clean = wa_temp[['county',
#                     'american_indian_alaskan_native',
#                     'asian',
#                     'black_african_american',
#                     'hispanic_latino_of_any_race',
#                     'native_hawaiian_other_pacific',
#                     'two_or_more_races',
#                     'white']].copy()

# #wa_clean = wa_clean.groupby(wa_clean['county']).sum()
wa_clean

Unnamed: 0_level_0,american_indian_alaskan_native,asian,black_african_american,hispanic_latino_of_any_race,native_hawaiian_other_pacific,two_or_more_races,white,STATE
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Adams,40,84,24,17748,8,136,3864,WA
Asotin,232,120,124,1036,8,732,11144,WA
Benton,596,3548,2504,50682,304,6964,90738,WA
Chelan,152,484,212,25900,28,1208,25486,WA
Clallam,2524,608,596,6268,112,4046,24506,WA
Clark,1348,13452,6518,63952,4646,24864,206744,WA
Columbia,24,24,20,324,0,32,1364,WA
Cowlitz,804,896,520,12724,588,4184,50916,WA
Douglas,140,200,96,16540,16,772,13392,WA
Ferry,912,8,28,116,20,324,2000,WA


In [17]:

wa_clean.rename(
  columns={'county':'COUNTY',
 'american_indian_alaskan_native':'AMERICAN_INDIAN',
 'native_hawaiian_other_pacific':'PACIFIC_ISLANDER',
 'hispanic_latino_of_any_race':'LATINO',
 'black_african_american':'AFRICAN_AMERICAN',
 '2019-20 White ':'WHITE',
 'two_or_more_races':'OTHER_MULTI',
 'asian':'ASIAN',
  },
  inplace=True
)
wa_clean

Unnamed: 0_level_0,AMERICAN_INDIAN,ASIAN,AFRICAN_AMERICAN,LATINO,PACIFIC_ISLANDER,OTHER_MULTI,white,STATE
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Adams,40,84,24,17748,8,136,3864,WA
Asotin,232,120,124,1036,8,732,11144,WA
Benton,596,3548,2504,50682,304,6964,90738,WA
Chelan,152,484,212,25900,28,1208,25486,WA
Clallam,2524,608,596,6268,112,4046,24506,WA
Clark,1348,13452,6518,63952,4646,24864,206744,WA
Columbia,24,24,20,324,0,32,1364,WA
Cowlitz,804,896,520,12724,588,4184,50916,WA
Douglas,140,200,96,16540,16,772,13392,WA
Ferry,912,8,28,116,20,324,2000,WA


In [18]:
wa_clean['STATE'] = 'WA'
wa_clean

Unnamed: 0_level_0,AMERICAN_INDIAN,ASIAN,AFRICAN_AMERICAN,LATINO,PACIFIC_ISLANDER,OTHER_MULTI,white,STATE
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Adams,40,84,24,17748,8,136,3864,WA
Asotin,232,120,124,1036,8,732,11144,WA
Benton,596,3548,2504,50682,304,6964,90738,WA
Chelan,152,484,212,25900,28,1208,25486,WA
Clallam,2524,608,596,6268,112,4046,24506,WA
Clark,1348,13452,6518,63952,4646,24864,206744,WA
Columbia,24,24,20,324,0,32,1364,WA
Cowlitz,804,896,520,12724,588,4184,50916,WA
Douglas,140,200,96,16540,16,772,13392,WA
Ferry,912,8,28,116,20,324,2000,WA


In [19]:
wa_clean['AMERICAN_INDIAN'] = round(wa_clean['AMERICAN_INDIAN'],1)
wa_clean

Unnamed: 0_level_0,AMERICAN_INDIAN,ASIAN,AFRICAN_AMERICAN,LATINO,PACIFIC_ISLANDER,OTHER_MULTI,white,STATE
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Adams,40,84,24,17748,8,136,3864,WA
Asotin,232,120,124,1036,8,732,11144,WA
Benton,596,3548,2504,50682,304,6964,90738,WA
Chelan,152,484,212,25900,28,1208,25486,WA
Clallam,2524,608,596,6268,112,4046,24506,WA
Clark,1348,13452,6518,63952,4646,24864,206744,WA
Columbia,24,24,20,324,0,32,1364,WA
Cowlitz,804,896,520,12724,588,4184,50916,WA
Douglas,140,200,96,16540,16,772,13392,WA
Ferry,912,8,28,116,20,324,2000,WA


In [20]:
wa_clean_df  =  pd.DataFrame(wa_clean.to_records())

wa_clean_df = wa_clean_df.rename(columns={'county':'COUNTY',
                                         'white':'WHITE'})
wa_clean_df

Unnamed: 0,COUNTY,AMERICAN_INDIAN,ASIAN,AFRICAN_AMERICAN,LATINO,PACIFIC_ISLANDER,OTHER_MULTI,WHITE,STATE
0,Adams,40,84,24,17748,8,136,3864,WA
1,Asotin,232,120,124,1036,8,732,11144,WA
2,Benton,596,3548,2504,50682,304,6964,90738,WA
3,Chelan,152,484,212,25900,28,1208,25486,WA
4,Clallam,2524,608,596,6268,112,4046,24506,WA
5,Clark,1348,13452,6518,63952,4646,24864,206744,WA
6,Columbia,24,24,20,324,0,32,1364,WA
7,Cowlitz,804,896,520,12724,588,4184,50916,WA
8,Douglas,140,200,96,16540,16,772,13392,WA
9,Ferry,912,8,28,116,20,324,2000,WA


In [21]:
temp = pd.concat([ca_clean_df, wa_clean_df])

In [22]:
clean_df = pd.concat([temp, or_clean_df])

In [23]:
clean_df

Unnamed: 0,COUNTY,AMERICAN_INDIAN,PACIFIC_ISLANDER,LATINO,AFRICAN_AMERICAN,WHITE,OTHER_MULTI,ASIAN,STATE
0,Alameda,597.0,2276.0,77433.0,20865.0,39186.0,16102.0,70872.0,CA
1,Alpine,36.0,0.0,2.0,0.0,26.0,6.0,0.0,CA
2,Amador,99.0,8.0,925.0,20.0,2765.0,309.0,40.0,CA
3,Butte,690.0,100.0,7469.0,602.0,17002.0,2270.0,1901.0,CA
4,Calaveras,82.0,5.0,1128.0,39.0,3671.0,342.0,48.0,CA
...,...,...,...,...,...,...,...,...,...
31,Wallowa,1.0,1.0,45.0,0.0,775.0,42.0,0.0,OR
32,Wasco,86.0,52.0,1205.0,13.0,1997.0,178.0,24.0,OR
33,Washington,396.0,781.0,25580.0,2025.0,43231.0,6220.0,9120.0,OR
34,Wheeler,9.0,5.0,136.0,26.0,1331.0,87.0,24.0,OR


In [24]:
rds_connection_string = "postgres:postgres@localhost:5432/ELT_Project"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [25]:
#Check for tables
engine.table_names()

['ELT']

In [33]:
clean_df.to_sql(name='ELT', con=engine, if_exists='append', index=False) #check other options for if_exists

In [32]:
pd.read_sql_query('SELECT * FROM "ELT"', con=engine).head()

Unnamed: 0,COUNTY,AMERICAN_INDIAN,PACIFIC_ISLANDER,LATINO,AFRICAN_AMERICAN,WHITE,OTHER_MULTI,ASIAN,STATE
0,Alameda,597.0,2276.0,77433.0,20865.0,39186.0,16102.0,70872.0,CA
1,Alpine,36.0,0.0,2.0,0.0,26.0,6.0,0.0,CA
2,Amador,99.0,8.0,925.0,20.0,2765.0,309.0,40.0,CA
3,Butte,690.0,100.0,7469.0,602.0,17002.0,2270.0,1901.0,CA
4,Calaveras,82.0,5.0,1128.0,39.0,3671.0,342.0,48.0,CA


In [None]:
#https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html

#df.insert(0, 'New_ID', range(0, 0 + len(df)))
#df
