In [1]:
import pandas as pd
import json
import pymongo
from pymongo import MongoClient

In [2]:
# Read BLS CSV umemployment file into dataframe using pandas

county_unemployment_2006 = pd.read_csv("2006_unemployment_by_county.csv")
county_unemployment_2006.head()

Unnamed: 0,laus_code,state_fips_code,county_fips_code,county_state,year,labor_force,employed,unemployed,unemployment_rate
0,CN0100100000000,1,1,"Autauga , AL",2006,24398,23585,813,3.3
1,CN0100300000000,1,3,"Baldwin , AL",2006,79711,77147,2564,3.2
2,CN0100500000000,1,5,"Barbour , AL",2006,10705,10096,609,5.7
3,CN0100700000000,1,7,"Bibb , AL",2006,8850,8477,373,4.2
4,CN0100900000000,1,9,"Blount , AL",2006,26770,25902,868,3.2


In [3]:
# Split BLS combined county_state column in dataframe into individual county, state columns

county_unemployment_2006_df = pd.DataFrame(county_unemployment_2006)
county_unemployment_2006_df[['county','state']]= county_unemployment_2006_df.county_state.str.split(",",expand=True,)

In [4]:
# Drop original BLS county_state column

clean_county_2006_df = county_unemployment_2006_df.drop('county_state',axis=1)
clean_county_2006_df = clean_county_2006_df.dropna()


In [5]:
# Drop Puerto Rico from dataframe

clean_county_2006_df = clean_county_2006_df[~clean_county_2006_df['state'].str.contains('PR')]
clean_county_2006_df

Unnamed: 0,laus_code,state_fips_code,county_fips_code,year,labor_force,employed,unemployed,unemployment_rate,county,state
0,CN0100100000000,1,1,2006,24398,23585,813,3.3,Autauga,AL
1,CN0100300000000,1,3,2006,79711,77147,2564,3.2,Baldwin,AL
2,CN0100500000000,1,5,2006,10705,10096,609,5.7,Barbour,AL
3,CN0100700000000,1,7,2006,8850,8477,373,4.2,Bibb,AL
4,CN0100900000000,1,9,2006,26770,25902,868,3.2,Blount,AL
5,CN0101100000000,1,11,2006,3702,3372,330,8.9,Bullock,AL
6,CN0101300000000,1,13,2006,9238,8709,529,5.7,Butler,AL
7,CN0101500000000,1,15,2006,54063,51872,2191,4.1,Calhoun,AL
8,CN0101700000000,1,17,2006,15749,14828,921,5.8,Chambers,AL
9,CN0101900000000,1,19,2006,11854,11352,502,4.2,Cherokee,AL


In [6]:
# Sort cleaned_county_df

sorted_county_2006_df = clean_county_2006_df.sort_values(['unemployment_rate'], ascending=False)
sorted_county_2006_df.head()

Unnamed: 0,laus_code,state_fips_code,county_fips_code,year,labor_force,employed,unemployed,unemployment_rate,county,state
80,CN0215800000000,2,158,2006,2539,2012,527,20.8,Kusilvak Census Area,AK
196,CN0602500000000,6,25,2006,63023,53304,9719,15.4,Imperial,CA
108,CN0402700000000,4,27,2006,77764,66129,11635,15.0,Yuma,AZ
86,CN0220100000000,2,201,2006,2353,2016,337,14.3,Prince of Wales-Outer Ketchikan Census Area,AK
2359,CN4601700000000,46,17,2006,549,471,78,14.2,Buffalo,SD


In [7]:
# Turn county and state fips codes into string

sorted_county_2006_df.state_fips_code = sorted_county_2006_df.state_fips_code.astype(str)
sorted_county_2006_df.county_fips_code = sorted_county_2006_df.county_fips_code.astype(str)

In [8]:
# Drop data after decimal point for county_fips_code

sorted_county_2006_df["county_fips_code"]=sorted_county_2006_df["county_fips_code"].str.split(pat=".")
sorted_county_2006_df["county_fips_code"]

80      [158]
196      [25]
108      [27]
86      [201]
2359     [17]
1279    [117]
70       [50]
93      [290]
2674    [323]
88      [232]
1291    [141]
1422     [63]
1280    [119]
189      [11]
83      [180]
2339     [67]
2766    [507]
2726    [427]
1227     [13]
1442    [103]
85      [188]
2467     [99]
2088    [111]
2340     [69]
3076     [78]
2338     [65]
2349     [87]
1297    [153]
2431     [27]
1801     [29]
        ...  
2783     [33]
1653     [19]
2023     [87]
1662     [37]
845     [119]
2848     [79]
2769      [5]
1657     [27]
1712    [137]
2925    [685]
1700    [113]
2862    [107]
2905    [510]
1675     [63]
1659     [31]
2838     [59]
383     [131]
1985     [11]
1681     [75]
1711    [135]
1702    [117]
3111      [5]
584      [73]
2032    [105]
2816     [13]
588      [81]
1600     [25]
583      [71]
3126     [35]
1636     [97]
Name: county_fips_code, Length: 3131, dtype: object

In [9]:
# Drop data after decimal point for county_fips_code

sorted_county_2006_df["state_fips_code"]=sorted_county_2006_df["state_fips_code"].str.split(pat=".")
sorted_county_2006_df["state_fips_code"]

80       [2]
196      [6]
108      [4]
86       [2]
2359    [46]
1279    [26]
70       [2]
93       [2]
2674    [48]
88       [2]
1291    [26]
1422    [28]
1280    [26]
189      [6]
83       [2]
2339    [45]
2766    [48]
2726    [48]
1227    [26]
1442    [28]
85       [2]
2467    [47]
2088    [39]
2340    [45]
3076    [55]
2338    [45]
2349    [45]
1297    [26]
2431    [47]
1801    [35]
        ... 
2783    [49]
1653    [31]
2023    [38]
1662    [31]
845     [19]
2848    [51]
2769    [49]
1657    [31]
1712    [31]
2925    [51]
1700    [31]
2862    [51]
2905    [51]
1675    [31]
1659    [31]
2838    [51]
383     [12]
1985    [38]
1681    [31]
1711    [31]
1702    [31]
3111    [56]
584     [16]
2032    [38]
2816    [51]
588     [16]
1600    [30]
583     [16]
3126    [56]
1636    [30]
Name: state_fips_code, Length: 3131, dtype: object

In [10]:
# Convert county_fips_code to match format of CDC data

sorted_county_2006_df["Updated county_fips_code"] = sorted_county_2006_df["county_fips_code"].apply(lambda x: x[0].zfill(3))
sorted_county_2006_df["Updated county_fips_code"]

80      158
196     025
108     027
86      201
2359    017
1279    117
70      050
93      290
2674    323
88      232
1291    141
1422    063
1280    119
189     011
83      180
2339    067
2766    507
2726    427
1227    013
1442    103
85      188
2467    099
2088    111
2340    069
3076    078
2338    065
2349    087
1297    153
2431    027
1801    029
       ... 
2783    033
1653    019
2023    087
1662    037
845     119
2848    079
2769    005
1657    027
1712    137
2925    685
1700    113
2862    107
2905    510
1675    063
1659    031
2838    059
383     131
1985    011
1681    075
1711    135
1702    117
3111    005
584     073
2032    105
2816    013
588     081
1600    025
583     071
3126    035
1636    097
Name: Updated county_fips_code, Length: 3131, dtype: object

In [11]:
# Convert state_fips_code to match format of CDC data

sorted_county_2006_df["Updated state_fips_code"] = sorted_county_2006_df["state_fips_code"].apply(lambda x: x[0])
sorted_county_2006_df["Updated state_fips_code"]

80       2
196      6
108      4
86       2
2359    46
1279    26
70       2
93       2
2674    48
88       2
1291    26
1422    28
1280    26
189      6
83       2
2339    45
2766    48
2726    48
1227    26
1442    28
85       2
2467    47
2088    39
2340    45
3076    55
2338    45
2349    45
1297    26
2431    47
1801    35
        ..
2783    49
1653    31
2023    38
1662    31
845     19
2848    51
2769    49
1657    31
1712    31
2925    51
1700    31
2862    51
2905    51
1675    31
1659    31
2838    51
383     12
1985    38
1681    31
1711    31
1702    31
3111    56
584     16
2032    38
2816    51
588     16
1600    30
583     16
3126    56
1636    30
Name: Updated state_fips_code, Length: 3131, dtype: object

In [12]:
# Concatenate updated county and state_fips_code into a new combined fips code

new_fips = sorted_county_2006_df["Updated state_fips_code"] + sorted_county_2006_df["Updated county_fips_code"]
new_fips

80       2158
196      6025
108      4027
86       2201
2359    46017
1279    26117
70       2050
93       2290
2674    48323
88       2232
1291    26141
1422    28063
1280    26119
189      6011
83       2180
2339    45067
2766    48507
2726    48427
1227    26013
1442    28103
85       2188
2467    47099
2088    39111
2340    45069
3076    55078
2338    45065
2349    45087
1297    26153
2431    47027
1801    35029
        ...  
2783    49033
1653    31019
2023    38087
1662    31037
845     19119
2848    51079
2769    49005
1657    31027
1712    31137
2925    51685
1700    31113
2862    51107
2905    51510
1675    31063
1659    31031
2838    51059
383     12131
1985    38011
1681    31075
1711    31135
1702    31117
3111    56005
584     16073
2032    38105
2816    51013
588     16081
1600    30025
583     16071
3126    56035
1636    30097
Length: 3131, dtype: object

In [13]:
# Insert new column to contain new_fips data

sorted_county_2006_df['new_fips'] = new_fips
sorted_county_2006_df

Unnamed: 0,laus_code,state_fips_code,county_fips_code,year,labor_force,employed,unemployed,unemployment_rate,county,state,Updated county_fips_code,Updated state_fips_code,new_fips
80,CN0215800000000,[2],[158],2006,2539,2012,527,20.8,Kusilvak Census Area,AK,158,2,2158
196,CN0602500000000,[6],[25],2006,63023,53304,9719,15.4,Imperial,CA,025,6,6025
108,CN0402700000000,[4],[27],2006,77764,66129,11635,15.0,Yuma,AZ,027,4,4027
86,CN0220100000000,[2],[201],2006,2353,2016,337,14.3,Prince of Wales-Outer Ketchikan Census Area,AK,201,2,2201
2359,CN4601700000000,[46],[17],2006,549,471,78,14.2,Buffalo,SD,017,46,46017
1279,CN2611700000000,[26],[117],2006,28428,24656,3772,13.3,Montcalm,MI,117,26,26117
70,CN0205000000000,[2],[50],2006,6980,6057,923,13.2,Bethel Census Area,AK,050,2,2050
93,CN0229000000000,[2],[290],2006,2790,2424,366,13.1,Yukon-Koyukuk Census Area,AK,290,2,2290
2674,CN4832300000000,[48],[323],2006,19535,17030,2505,12.8,Maverick,TX,323,48,48323
88,CN0223200000000,[2],[232],2006,1898,1657,241,12.7,Skagway-Hoonah-Angoon Census Area,AK,232,2,2232


In [14]:
# Repeat all stapes taken to transform 2006 BLS data for 2016 (cells 2-13)

county_unemployment_2016 = pd.read_csv("2016_unemployment_by_county.csv")
county_unemployment_2016.head()

Unnamed: 0,laus_code,state_fips_code,county_fips_code,county_state,year,labor_force,employed,unemployed,unemployment_rate
0,CN0100100000000,1,1,"Autauga County, AL",2016,25966,24645,1321,5.1
1,CN0100300000000,1,3,"Baldwin County, AL",2016,90670,85839,4831,5.3
2,CN0100500000000,1,5,"Barbour County, AL",2016,8417,7717,700,8.3
3,CN0100700000000,1,7,"Bibb County, AL",2016,8623,8067,556,6.4
4,CN0100900000000,1,9,"Blount County, AL",2016,24623,23298,1325,5.4


In [15]:
county_unemployment_2016_df = pd.DataFrame(county_unemployment_2016)
county_unemployment_2016_df[['county','state']]= county_unemployment_2016_df.county_state.str.split(",",expand=True,)

In [16]:
clean_county_2016_df = county_unemployment_2016_df.drop('county_state',axis=1)
clean_county_2016_df = clean_county_2016_df.dropna()

clean_county_2016_df = clean_county_2016_df[~clean_county_2016_df['state'].str.contains('PR')]

In [17]:
sorted_county_2016_df = clean_county_2016_df.sort_values(['unemployment_rate'], ascending=False)
sorted_county_2016_df.head()

Unnamed: 0,laus_code,state_fips_code,county_fips_code,year,labor_force,employed,unemployed,unemployment_rate,county,state
198,CN0602500000000,6,25,2016,75207,57049,18158,24.1,Imperial County,CA
81,CN0215800000000,2,158,2016,2725,2136,589,21.6,Kusilvak Census Area,AK
1068,CN2115300000000,21,153,2016,3147,2520,627,19.9,Magoffin County,KY
110,CN0402700000000,4,27,2016,94117,76345,17772,18.9,Yuma County,AZ
95,CN0229000000000,2,290,2016,2423,1990,433,17.9,Yukon-Koyukuk Census Area,AK


In [18]:
sorted_county_2016_df.state_fips_code = sorted_county_2016_df.state_fips_code.astype(str)
sorted_county_2016_df.county_fips_code = sorted_county_2016_df.county_fips_code.astype(str)

In [19]:
sorted_county_2016_df["county_fips_code"]=sorted_county_2016_df["county_fips_code"].str.split(pat=".")
sorted_county_2016_df["county_fips_code"]

198      [25]
81      [158]
1068    [153]
110      [27]
95      [290]
86      [188]
191      [11]
1431     [63]
65      [131]
1810     [29]
70       [50]
1427     [55]
2775    [507]
1057    [131]
2735    [427]
1039     [95]
2997     [13]
84      [180]
3020     [59]
1058    [133]
82      [164]
88      [198]
3014     [47]
2766    [489]
76      [105]
1173    [123]
2545     [47]
1051    [119]
1023     [63]
96        [1]
        ...  
2401     [83]
2361      [3]
1690     [75]
1997     [17]
2421    [123]
1994     [11]
1668     [31]
1681     [57]
2036     [95]
284      [79]
2034     [91]
305     [121]
1744    [183]
2677    [311]
2389     [59]
1720    [135]
307     [125]
922      [71]
1704    [103]
1667     [29]
276      [63]
275      [61]
2624    [205]
847     [119]
303     [117]
2029     [81]
1624     [55]
292      [95]
1999     [21]
248       [9]
Name: county_fips_code, Length: 3140, dtype: object

In [20]:
sorted_county_2016_df["state_fips_code"]=sorted_county_2016_df["state_fips_code"].str.split(pat=".")
sorted_county_2016_df["state_fips_code"]

198      [6]
81       [2]
1068    [21]
110      [4]
95       [2]
86       [2]
191      [6]
1431    [28]
65       [1]
1810    [35]
70       [2]
1427    [28]
2775    [48]
1057    [21]
2735    [48]
1039    [21]
2997    [54]
84       [2]
3020    [54]
1058    [21]
82       [2]
88       [2]
3014    [54]
2766    [48]
76       [2]
1173    [22]
2545    [48]
1051    [21]
1023    [21]
96       [4]
        ... 
2401    [46]
2361    [46]
1690    [31]
1997    [38]
2421    [46]
1994    [38]
1668    [31]
1681    [31]
2036    [38]
284      [8]
2034    [38]
305      [8]
1744    [31]
2677    [48]
2389    [46]
1720    [31]
307      [8]
922     [20]
1704    [31]
1667    [31]
276      [8]
275      [8]
2624    [48]
847     [19]
303      [8]
2029    [38]
1624    [30]
292      [8]
1999    [38]
248      [8]
Name: state_fips_code, Length: 3140, dtype: object

In [21]:
sorted_county_2016_df["Updated county_fips_code"] = sorted_county_2016_df["county_fips_code"].apply(lambda x: x[0].zfill(3))
sorted_county_2016_df["Updated county_fips_code"]

198     025
81      158
1068    153
110     027
95      290
86      188
191     011
1431    063
65      131
1810    029
70      050
1427    055
2775    507
1057    131
2735    427
1039    095
2997    013
84      180
3020    059
1058    133
82      164
88      198
3014    047
2766    489
76      105
1173    123
2545    047
1051    119
1023    063
96      001
       ... 
2401    083
2361    003
1690    075
1997    017
2421    123
1994    011
1668    031
1681    057
2036    095
284     079
2034    091
305     121
1744    183
2677    311
2389    059
1720    135
307     125
922     071
1704    103
1667    029
276     063
275     061
2624    205
847     119
303     117
2029    081
1624    055
292     095
1999    021
248     009
Name: Updated county_fips_code, Length: 3140, dtype: object

In [22]:
sorted_county_2016_df["Updated state_fips_code"] = sorted_county_2016_df["state_fips_code"].apply(lambda x: x[0])
sorted_county_2016_df["Updated state_fips_code"]

198      6
81       2
1068    21
110      4
95       2
86       2
191      6
1431    28
65       1
1810    35
70       2
1427    28
2775    48
1057    21
2735    48
1039    21
2997    54
84       2
3020    54
1058    21
82       2
88       2
3014    54
2766    48
76       2
1173    22
2545    48
1051    21
1023    21
96       4
        ..
2401    46
2361    46
1690    31
1997    38
2421    46
1994    38
1668    31
1681    31
2036    38
284      8
2034    38
305      8
1744    31
2677    48
2389    46
1720    31
307      8
922     20
1704    31
1667    31
276      8
275      8
2624    48
847     19
303      8
2029    38
1624    30
292      8
1999    38
248      8
Name: Updated state_fips_code, Length: 3140, dtype: object

In [23]:
new_fips = sorted_county_2016_df["Updated state_fips_code"] + sorted_county_2016_df["Updated county_fips_code"]

In [24]:
sorted_county_2016_df['new_fips'] = new_fips
sorted_county_2016_df

Unnamed: 0,laus_code,state_fips_code,county_fips_code,year,labor_force,employed,unemployed,unemployment_rate,county,state,Updated county_fips_code,Updated state_fips_code,new_fips
198,CN0602500000000,[6],[25],2016,75207,57049,18158,24.1,Imperial County,CA,025,6,6025
81,CN0215800000000,[2],[158],2016,2725,2136,589,21.6,Kusilvak Census Area,AK,158,2,2158
1068,CN2115300000000,[21],[153],2016,3147,2520,627,19.9,Magoffin County,KY,153,21,21153
110,CN0402700000000,[4],[27],2016,94117,76345,17772,18.9,Yuma County,AZ,027,4,4027
95,CN0229000000000,[2],[290],2016,2423,1990,433,17.9,Yukon-Koyukuk Census Area,AK,290,2,2290
86,CN0218800000000,[2],[188],2016,2964,2490,474,16.0,Northwest Arctic Borough,AK,188,2,2188
191,CN0601100000000,[6],[11],2016,10910,9214,1696,15.5,Colusa County,CA,011,6,6011
1431,CN2806300000000,[28],[63],2016,2229,1898,331,14.8,Jefferson County,MS,063,28,28063
65,CN0113100000000,[1],[131],2016,2822,2416,406,14.4,Wilcox County,AL,131,1,1131
1810,CN3502900000000,[35],[29],2016,10768,9218,1550,14.4,Luna County,NM,029,35,35029


In [25]:
# Merge 2006 and 2016 BLS unemployment data by county on new_fips

merged_UE_df = pd.merge(sorted_county_2006_df, sorted_county_2016_df , on=['new_fips'], how='inner')
merged_UE_df = merged_UE_df.rename(columns={'new_fips_y': 'new_fips'})

# Convert new_fips column into integer data-type

merged_UE_df['new_fips'] = merged_UE_df['new_fips'].astype(int)
merged_UE_df.head()

Unnamed: 0,laus_code_x,state_fips_code_x,county_fips_code_x,year_x,labor_force_x,employed_x,unemployed_x,unemployment_rate_x,county_x,state_x,...,county_fips_code_y,year_y,labor_force_y,employed_y,unemployed_y,unemployment_rate_y,county_y,state_y,Updated county_fips_code_y,Updated state_fips_code_y
0,CN0215800000000,[2],[158],2006,2539,2012,527,20.8,Kusilvak Census Area,AK,...,[158],2016,2725,2136,589,21.6,Kusilvak Census Area,AK,158,2
1,CN0602500000000,[6],[25],2006,63023,53304,9719,15.4,Imperial,CA,...,[25],2016,75207,57049,18158,24.1,Imperial County,CA,25,6
2,CN0402700000000,[4],[27],2006,77764,66129,11635,15.0,Yuma,AZ,...,[27],2016,94117,76345,17772,18.9,Yuma County,AZ,27,4
3,CN4601700000000,[46],[17],2006,549,471,78,14.2,Buffalo,SD,...,[17],2016,726,666,60,8.3,Buffalo County,SD,17,46
4,CN2611700000000,[26],[117],2006,28428,24656,3772,13.3,Montcalm,MI,...,[117],2016,27926,26454,1472,5.3,Montcalm County,MI,117,26


In [26]:
# Assign variable to CDC url for 2016 table data and use pandas to read table into dataframe

url = "https://www.cdc.gov/drugoverdose/maps/rxcounty2006.html"
px_table_2006 = pd.read_html(url)

# Data imported as list of tables; specify element needed within table list

px_2006_df = pd.DataFrame(px_table_2006[0])
px_2006_df.columns=['County', 'State', 'county_fips_code', '2006 Px Rate']
px_2006_df.head()

Unnamed: 0,County,State,county_fips_code,2006 Px Rate
0,"Aleutians East, AK",AK,2013,–
1,"Aleutians West, AK",AK,2016,–
2,"Anchorage, AK",AK,2020,71.5
3,"Bethel, AK",AK,2050,–
4,"Bristol Bay, AK",AK,2060,–


In [27]:
# Assign variable to CDC url for 2016 table data and use pandas to read table into dataframe

url = "https://www.cdc.gov/drugoverdose/maps/rxcounty2016.html"
px_table_2016 = pd.read_html(url)

# Data imported as list of tables; specify element needed within table list

px_2016_df = pd.DataFrame(px_table_2016[0])
px_2016_df.columns=['County', 'State', 'county_fips_code', '2016 Px Rate']
px_2016_df

Unnamed: 0,County,State,county_fips_code,2016 Px Rate
0,"Aleutians East, AK",AK,2013,–
1,"Aleutians West, AK",AK,2016,–
2,"Anchorage, AK",AK,2020,66.3
3,"Bethel, AK",AK,2050,–
4,"Bristol Bay, AK",AK,2060,–
5,"Denali, AK",AK,2068,–
6,"Dillingham, AK",AK,2070,–
7,"Fairbanks North Star, AK",AK,2090,48.4
8,"Haines, AK",AK,2100,–
9,"Hoonah-Angoon, AK",AK,2105,–


In [28]:
# Merge 2006 and 2016 CDC data into single dataframe on county_fips_code
# CDC county_fips_code includes both county code and state code prefix into contaenated code; BLS data was separate and required concatenation

merged_df = pd.merge(px_2006_df, px_2016_df, on='county_fips_code', how='inner')
merged_df.drop(columns=['County_y','State_y'])
final_df = merged_df.rename(columns={'county_fips_code':'new_fips', 'County_x': 'County', 
                                     'State_x': 'State','2006 Px Rate': 
                                     '2006_px_rate','2016 Px Rate':'2016_px_rate'})


# CDC county column included both county and state abbreviation; eliminate state abbreviation from this column

final_df [['County','junk']]= final_df.County.str.split(",",expand=True,)

final_df = final_df[final_df['2006_px_rate'] != '–']
final_df = final_df[final_df['2016_px_rate'] != '–']
final_df = final_df.drop('junk', axis=1)

# Convert new_fips column data to integer 

final_df['new_fips'] = final_df['new_fips'].astype(int)

final_df['new_fips'].dtype

dtype('int32')

In [29]:
# Merge BLS and CDC dataframe into single dataframe on new_fips

merged_final = pd.merge(merged_UE_df,final_df , on=['new_fips'], how='inner')
merged_final

Unnamed: 0,laus_code_x,state_fips_code_x,county_fips_code_x,year_x,labor_force_x,employed_x,unemployed_x,unemployment_rate_x,county_x,state_x,...,county_y,state_y,Updated county_fips_code_y,Updated state_fips_code_y,County,State,2006_px_rate,County_y,State_y,2016_px_rate
0,CN0602500000000,[6],[25],2006,63023,53304,9719,15.4,Imperial,CA,...,Imperial County,CA,025,6,Imperial,CA,51.7,"Imperial, CA",CA,52.4
1,CN0402700000000,[4],[27],2006,77764,66129,11635,15.0,Yuma,AZ,...,Yuma County,AZ,027,4,Yuma,AZ,46.9,"Yuma, AZ",AZ,56.6
2,CN2611700000000,[26],[117],2006,28428,24656,3772,13.3,Montcalm,MI,...,Montcalm County,MI,117,26,Montcalm,MI,88.2,"Montcalm, MI",MI,115.9
3,CN4832300000000,[48],[323],2006,19535,17030,2505,12.8,Maverick,TX,...,Maverick County,TX,323,48,Maverick,TX,28.0,"Maverick, TX",TX,28.4
4,CN2614100000000,[26],[141],2006,6233,5459,774,12.4,Presque Isle,MI,...,Presque Isle County,MI,141,26,Presque Isle,MI,73.0,"Presque Isle, MI",MI,52.4
5,CN2611900000000,[26],[119],2006,4353,3817,536,12.3,Montmorency,MI,...,Montmorency County,MI,119,26,Montmorency,MI,58.1,"Montmorency, MI",MI,64.3
6,CN0601100000000,[6],[11],2006,9915,8701,1214,12.2,Colusa,CA,...,Colusa County,CA,011,6,Colusa,CA,32.2,"Colusa, CA",CA,45.2
7,CN4506700000000,[45],[67],2006,13391,11781,1610,12.0,Marion,SC,...,Marion County,SC,067,45,Marion,SC,107.3,"Marion, SC",SC,115.6
8,CN4850700000000,[48],[507],2006,3749,3311,438,11.7,Zavala,TX,...,Zavala County,TX,507,48,Zavala,TX,26.0,"Zavala, TX",TX,0.7
9,CN4842700000000,[48],[427],2006,21432,18955,2477,11.6,Starr,TX,...,Starr County,TX,427,48,Starr,TX,24.2,"Starr, TX",TX,34.6


In [30]:
# Clean up columns in final dataframe

final_df = merged_final.drop(columns=['laus_code_x','laus_code_y','state_fips_code_x', 'state_fips_code_x','county_fips_code_x',
                                      'county_fips_code_y','Updated county_fips_code_x',
                                      'Updated county_fips_code_y','Updated state_fips_code_x',
                                      'state_fips_code_y','Updated state_fips_code_y',
                                      'County_y','State_y','County','State'])

final_df = final_df.rename(columns={'year_x':'2006', 'labor_force_x':'labor_force_2006',
                                   'employed_x': 'employed_2006', 'unemployed_x':'unemployed:2006',
                                   'unemployment_rate_x':'unemployment_rate_2006',
                                   'year_y':'2016','labor_force_y':'labor_force_2016',
                                   'employed_y': 'employed_2016', 'unemployed_y':'unemployed:2016',
                                   'unemployment_rate_y':'unemployment_rate_2016'})
final_df

Unnamed: 0,2006,labor_force_2006,employed_2006,unemployed:2006,unemployment_rate_2006,county_x,state_x,new_fips,2016,labor_force_2016,employed_2016,unemployed:2016,unemployment_rate_2016,county_y,state_y,2006_px_rate,2016_px_rate
0,2006,63023,53304,9719,15.4,Imperial,CA,6025,2016,75207,57049,18158,24.1,Imperial County,CA,51.7,52.4
1,2006,77764,66129,11635,15.0,Yuma,AZ,4027,2016,94117,76345,17772,18.9,Yuma County,AZ,46.9,56.6
2,2006,28428,24656,3772,13.3,Montcalm,MI,26117,2016,27926,26454,1472,5.3,Montcalm County,MI,88.2,115.9
3,2006,19535,17030,2505,12.8,Maverick,TX,48323,2016,24244,21523,2721,11.2,Maverick County,TX,28.0,28.4
4,2006,6233,5459,774,12.4,Presque Isle,MI,26141,2016,5288,4784,504,9.5,Presque Isle County,MI,73.0,52.4
5,2006,4353,3817,536,12.3,Montmorency,MI,26119,2016,3060,2750,310,10.1,Montmorency County,MI,58.1,64.3
6,2006,9915,8701,1214,12.2,Colusa,CA,6011,2016,10910,9214,1696,15.5,Colusa County,CA,32.2,45.2
7,2006,13391,11781,1610,12.0,Marion,SC,45067,2016,12670,11560,1110,8.8,Marion County,SC,107.3,115.6
8,2006,3749,3311,438,11.7,Zavala,TX,48507,2016,3887,3348,539,13.9,Zavala County,TX,26.0,0.7
9,2006,21432,18955,2477,11.6,Starr,TX,48427,2016,26358,22841,3517,13.3,Starr County,TX,24.2,34.6


In [31]:
# Load dataframe into Mongo database and collection

conn = "mongodb://localhost:27017"
client = pymongo.MongoClient(conn)

# declare database
db = client.final_db

# Declare the collection
collection = db.final_db

records = final_df.to_dict('records')
db.myCollection.insert_many(records)

<pymongo.results.InsertManyResult at 0x25c9b9087c8>