In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from mpl_toolkits.axes_grid1 import make_axes_locatable
import kmapper as km
import pyballmapper as pbm
import statistics
import seaborn as sns
from matplotlib.colors import ListedColormap
from matplotlib import colormaps as cm
import networkx as nx

# Election Data

https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ

In [48]:
# County Presidential Election Returns 2000-2020
df=pd.read_csv('US_data/countypres_2000-2020.csv')

In [49]:
df.head()

Unnamed: 0,year,state,state_po,county_name,county_fips,office,candidate,party,candidatevotes,totalvotes,version,mode
0,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,AL GORE,DEMOCRAT,4942,17208,20220315,TOTAL
1,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,GEORGE W. BUSH,REPUBLICAN,11993,17208,20220315,TOTAL
2,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,RALPH NADER,GREEN,160,17208,20220315,TOTAL
3,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,OTHER,OTHER,113,17208,20220315,TOTAL
4,2000,ALABAMA,AL,BALDWIN,1003.0,US PRESIDENT,AL GORE,DEMOCRAT,13997,56480,20220315,TOTAL


In [50]:
## Extracting the useful columns 
cols = ['year',
        'state',
        'county_name',
        'county_fips',
         'party',
         'candidatevotes'
         ,'totalvotes']

data = df[cols]
#calculating vote percentages
data['percentagevotes']=data['candidatevotes']/data['totalvotes']*100
pv = data.pivot_table(index=['county_fips','county_name','state'], columns=['party','year'], values='percentagevotes')
pv=pv.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['percentagevotes']=data['candidatevotes']/data['totalvotes']*100


In [51]:
pv.columns = [f'{party}_{year}' if year else f'{party}' for party, year in pv.columns]
# Reset index
pv = pv.reset_index()
pv

Unnamed: 0,index,county_fips,county_name,state,DEMOCRAT_2000,DEMOCRAT_2004,DEMOCRAT_2008,DEMOCRAT_2012,DEMOCRAT_2016,DEMOCRAT_2020,...,OTHER_2008,OTHER_2012,OTHER_2016,OTHER_2020,REPUBLICAN_2000,REPUBLICAN_2004,REPUBLICAN_2008,REPUBLICAN_2012,REPUBLICAN_2016,REPUBLICAN_2020
0,0,1001.0,AUTAUGA,ALABAMA,28.719200,23.694039,25.773021,26.587832,23.769671,27.018365,...,0.613341,0.793916,3.463741,1.544833,69.694328,75.673522,73.613637,72.618252,72.766588,71.436802
1,1,1003.0,BALDWIN,ALABAMA,24.782224,22.502885,23.811922,21.589444,19.385601,22.409030,...,0.928599,1.052286,4.068687,1.419597,72.365439,76.415176,75.259479,77.358269,76.545712,76.171373
2,2,1005.0,BARBOUR,ALABAMA,49.908610,44.836225,48.985383,51.368494,46.527844,45.788173,...,0.576096,0.408376,1.375490,0.760601,49.023569,54.736940,50.438521,48.223130,52.096666,53.451226
3,3,1007.0,BIBB,ALABAMA,38.163639,27.486842,26.596483,26.152019,21.249575,20.698280,...,0.960204,1.021378,2.347205,0.875456,60.174623,72.000000,72.443313,72.826603,76.403220,78.426264
4,4,1009.0,BLOUNT,ALABAMA,27.691537,18.312872,14.513537,12.371907,8.425825,9.569378,...,1.467013,1.162209,2.239331,0.859069,70.477939,80.850074,84.019450,86.465884,89.334844,89.571553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3190,3190,56039.0,TETON,WYOMING,38.529384,52.575051,60.457966,54.195743,57.923497,66.599040,...,2.605389,3.428123,11.023996,2.319605,52.286454,45.109605,36.936645,42.376134,31.052507,29.356868
3191,3191,56041.0,UINTA,WYOMING,22.243192,22.460092,27.524353,19.065464,14.191263,16.819960,...,4.015206,3.466448,13.152302,2.114388,73.726072,75.250588,68.460442,77.468088,72.656434,79.247278
3192,3192,56043.0,WASHAKIE,WYOMING,19.896322,20.782693,25.414634,20.131846,13.948610,16.145833,...,2.487805,3.448276,9.727320,1.760913,77.462355,77.783179,72.097561,76.419878,76.324069,80.481151
3193,3193,56045.0,WESTON,WYOMING,14.673203,17.099057,19.273579,12.563263,8.479864,10.112360,...,4.042179,3.453409,5.501985,1.320225,82.385621,80.748821,76.684241,83.983328,86.018151,87.275281


In [52]:
pv['GEO_ID'] = pv['county_fips'].astype(int)

In [53]:
elec_data=pv[['GEO_ID','county_name','state','DEMOCRAT_2012','REPUBLICAN_2012','DEMOCRAT_2016','REPUBLICAN_2016']]

# Demographic Data

Census and American Community Survey from 2016 using 5 year estimates
https://data.census.gov/

In [54]:
elec_data.to_csv('US results/Output datasets/election_data_US.csv')

## Education

In [55]:
edu_data=pd.read_csv('US_data/education.csv')

In [56]:
#Categories age 18-24 and aged 25 plus
edu_data

Unnamed: 0,GEO_ID,Nohighschool_1824,Highschool_1824,Somedegree_1824,Degree_1824,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus
0,0500000US01001,18.8,37.5,36.2,7.4,12.4,34.3,21.5,7.1,24.6
1,0500000US01003,18.7,31.9,41.3,8.1,9.9,28.7,22.4,9.4,29.5
2,0500000US01005,21.6,44.2,30.7,3.5,26.3,34.9,17.8,8.2,12.9
3,0500000US01007,25.5,31.1,42.0,1.5,19.3,41.8,21.1,5.8,12.0
4,0500000US01009,18.2,32.8,46.1,2.9,19.9,32.9,22.5,11.6,13.0
...,...,...,...,...,...,...,...,...,...,...
3215,0500000US72145,26.2,10.7,59.8,3.3,41.8,13.7,12.5,11.3,20.7
3216,0500000US72147,6.1,28.8,44.8,20.2,30.7,35.8,7.0,7.5,19.0
3217,0500000US72149,8.5,32.5,53.2,5.8,29.6,34.5,10.2,7.2,18.6
3218,0500000US72151,12.8,27.8,53.5,5.9,32.5,26.1,13.3,11.2,16.8


In [57]:
edu_data=edu_data[['GEO_ID','Nohighschool_25plus', 'Highschool_25plus',
       'Somedegree_25plus', 'Assocdegree_25plus', 'Degree_25plus']]

## Age

In [58]:
age_data=pd.read_csv('US_data/age.csv')

In [59]:
age_data.describe()

Unnamed: 0,Under18,18to24,25to29,30to34,35to39,40to44,45to49,50to54,55to59,60to64,Over65
count,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0
mean,22.574099,9.03764,5.797329,5.87795,5.711553,5.976988,6.344441,7.201988,7.273199,6.654969,17.54823
std,3.459112,3.526647,1.342807,1.107852,1.018101,1.012998,0.880416,0.92257,1.294687,1.3412,4.459395
min,1.1,0.7,0.0,0.0,0.0,1.1,0.3,0.0,1.8,1.4,3.9
25%,20.5,7.4,5.0,5.3,5.1,5.4,5.9,6.8,6.5,5.8,14.7
50%,22.5,8.3,5.7,5.8,5.7,6.0,6.4,7.2,7.2,6.5,17.2
75%,24.3,9.6,6.4,6.4,6.3,6.6,6.9,7.7,7.9,7.3,19.825
max,40.3,55.7,15.2,15.1,10.2,15.8,12.5,13.9,31.6,15.1,53.1


In [60]:
age_data['25to34']=age_data['25to29']+age_data['30to34']
age_data['35to49']=age_data['35to39']+age_data['40to44']+age_data['45to49']
age_data['50to64']=age_data['50to54']+age_data['55to59']+age_data['60to64']

In [61]:
age_data.columns

Index(['GEO_ID', 'Under18', '18to24', '25to29', '30to34', '35to39', '40to44',
       '45to49', '50to54', '55to59', '60to64', 'Over65', '25to34', '35to49',
       '50to64'],
      dtype='object')

In [62]:
age_data=age_data[['GEO_ID', 'Under18', '18to24', '25to34', '35to49',
       '50to64','Over65', ]]

### Ethnicity

In [63]:
eth_data=pd.read_csv('US_data/ethnicity.csv')

In [64]:
eth_data

Unnamed: 0,GEO_ID,White,Black
0,0500000US01001,76.7,20.9
1,0500000US01003,88.0,9.0
2,0500000US01005,47.3,48.6
3,0500000US01007,77.5,20.8
4,0500000US01009,91.7,2.1
...,...,...,...
3216,0500000US72145,59.4,16.6
3217,0500000US72147,35.8,26.0
3218,0500000US72149,58.3,16.0
3219,0500000US72151,43.9,20.1


### Rural/Urban

In [65]:
rural_data=pd.read_csv('US_data/rural_urban.csv')

In [66]:
rural_data['Rural']=(rural_data['Rural']/rural_data['Tot_pop '])*100

In [67]:
rural_data=rural_data[['GEO_ID','Rural']]

In [68]:
rural_data.describe()

Unnamed: 0,Rural
count,3221.0
mean,62.770482
std,34.341945
min,0.0
25%,33.509863
50%,65.105101
75%,100.0
max,100.0


### Income data

In [69]:
income_data=pd.read_csv('US_Data/income_cat.csv')
income_data['GEO_ID']=income_data['GEO_ID'].astype(object)

In [70]:
income_data

Unnamed: 0,GEO_ID,lessthan50K,50to75k,75to100k,morethan100k
0,1001,46.4,19.5,13.2,20.8
1,1003,48.5,18.6,12.0,20.9
2,1005,65.9,14.6,9.4,9.9
3,1007,60.3,17.7,11.4,10.6
4,1009,53.9,21.8,11.0,13.3
...,...,...,...,...,...
3109,56037,36.3,18.6,15.1,30.0
3110,56039,29.4,20.3,14.3,36.0
3111,56041,47.7,16.4,13.3,22.5
3112,56043,53.4,20.4,11.9,14.3


In [71]:
income_data.describe()

Unnamed: 0,lessthan50K,50to75k,75to100k,morethan100k
count,3114.0,3114.0,3114.0,3114.0
mean,52.949615,18.539788,11.672319,16.839274
std,10.404013,2.787136,2.709991,7.932256
min,14.8,6.6,1.3,2.5
25%,46.5,16.8,9.9,11.6
50%,53.4,18.6,11.9,15.0
75%,60.3,20.3,13.5,19.8
max,83.0,30.2,32.4,63.0


### Industry data

In [72]:
industry_data=pd.read_csv('US_data/industry.csv')

In [73]:
industry_data.describe()

Unnamed: 0,manu_arg
count,3220.0
mean,19.032764
std,8.30797
min,0.2
25%,12.8
50%,18.5
75%,24.5
max,69.6


### Unemployment data

In [74]:
unemp_data=pd.read_csv('US_data/unemployment.csv')

In [75]:
poverty_data=pd.read_csv('US_data/poverty.csv')

In [76]:
poverty_data

Unnamed: 0,GEO_ID,Poverty
0,0500000US01001,12.3
1,0500000US01003,13.0
2,0500000US01005,26.4
3,0500000US01007,16.5
4,0500000US01009,16.5
...,...,...
3215,0500000US72145,46.0
3216,0500000US72147,35.9
3217,0500000US72149,50.8
3218,0500000US72151,49.5


In [77]:
health_data=pd.read_csv('US_data/countyhealth.csv')
health_data=health_data.dropna()
health_data['FIPS']=health_data['FIPS'].astype(object)
health_data[['Adult obesity raw value','Poor or fair health raw value','Unemployment raw value']]=health_data[['Adult obesity raw value','Poor or fair health raw value','Unemployment raw value']]*100

In [78]:
health_data.tail()

Unnamed: 0,FIPS,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value
3187,56037,29.2,13.9,4.162206
3188,56039,12.5,11.1,4.795996
3189,56041,29.6,14.7,5.03488
3190,56043,24.7,14.3,4.511628
3191,56045,29.4,13.4,3.425693


In [79]:
health_data.describe()

Unnamed: 0,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value
count,3190.0,3190.0,3190.0
mean,30.903301,16.913346,6.258686
std,4.467052,4.951684,2.287594
min,10.7,7.2,1.18856
25%,28.5,13.0,4.674522
50%,31.1,15.9,6.033636
75%,33.675,20.0,7.500364
max,46.6,41.7,23.66843


## Merged dataset

In [80]:
demo_data = (edu_data
             .merge(age_data, on='GEO_ID')
             .merge(eth_data, on='GEO_ID')
             .merge(rural_data, on='GEO_ID')
             .merge(industry_data, on='GEO_ID')
             .merge(poverty_data,on='GEO_ID'))

In [81]:
demo_data

Unnamed: 0,GEO_ID,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Black,Rural,manu_arg,Poverty
0,0500000US01001,12.4,34.3,21.5,7.1,24.6,25.2,8.6,12.2,21.0,19.1,14.0,76.7,20.9,41.030801,15.2,12.3
1,0500000US01003,9.9,28.7,22.4,9.4,29.5,22.2,7.7,11.3,19.1,21.0,18.7,88.0,9.0,32.815672,10.6,13.0
2,0500000US01005,26.3,34.9,17.8,8.2,12.9,21.5,9.4,13.8,19.1,19.7,16.5,47.3,48.6,64.882080,26.9,26.4
3,0500000US01007,19.3,41.8,21.1,5.8,12.0,21.1,9.2,13.8,21.4,19.7,14.9,77.5,20.8,100.000000,25.0,16.5
4,0500000US01009,19.9,32.9,22.5,11.6,13.0,23.6,8.0,11.5,19.8,19.9,17.2,91.7,2.1,90.719682,18.5,16.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3214,0500000US72145,41.8,13.7,12.5,11.3,20.7,22.2,9.8,12.7,19.3,19.0,17.1,59.4,16.6,5.602611,17.2,46.0
3215,0500000US72147,30.7,35.8,7.0,7.5,19.0,21.7,9.7,9.8,18.0,21.2,19.6,35.8,26.0,21.157835,7.3,35.9
3216,0500000US72149,29.6,34.5,10.2,7.2,18.6,24.2,11.4,12.2,18.4,19.7,14.1,58.3,16.0,17.892958,21.5,50.8
3217,0500000US72151,32.5,26.1,13.3,11.2,16.8,22.0,9.9,11.7,19.1,20.7,16.5,43.9,20.1,16.971225,18.9,49.5


In [82]:
demo_data['GEO_ID'] = demo_data['GEO_ID'].str.replace('0500000US', '', regex=False).astype(int)
demo_data=demo_data.merge(income_data, on='GEO_ID')

In [83]:
demo_data

Unnamed: 0,GEO_ID,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,...,Over65,White,Black,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k
0,1001,12.4,34.3,21.5,7.1,24.6,25.2,8.6,12.2,21.0,...,14.0,76.7,20.9,41.030801,15.2,12.3,46.4,19.5,13.2,20.8
1,1003,9.9,28.7,22.4,9.4,29.5,22.2,7.7,11.3,19.1,...,18.7,88.0,9.0,32.815672,10.6,13.0,48.5,18.6,12.0,20.9
2,1005,26.3,34.9,17.8,8.2,12.9,21.5,9.4,13.8,19.1,...,16.5,47.3,48.6,64.882080,26.9,26.4,65.9,14.6,9.4,9.9
3,1007,19.3,41.8,21.1,5.8,12.0,21.1,9.2,13.8,21.4,...,14.9,77.5,20.8,100.000000,25.0,16.5,60.3,17.7,11.4,10.6
4,1009,19.9,32.9,22.5,11.6,13.0,23.6,8.0,11.5,19.8,...,17.2,91.7,2.1,90.719682,18.5,16.5,53.9,21.8,11.0,13.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3109,56037,9.3,34.1,24.9,10.3,21.4,27.5,9.1,15.5,18.7,...,9.4,91.1,1.6,12.071037,25.1,11.0,36.3,18.6,15.1,30.0
3110,56039,4.6,15.8,19.5,6.2,53.8,19.5,5.7,20.0,22.7,...,11.8,91.4,0.5,62.744654,6.9,7.3,29.4,20.3,14.3,36.0
3111,56041,10.4,36.6,26.1,8.8,18.2,29.6,8.0,12.7,18.6,...,10.7,93.6,0.8,42.521028,15.5,16.8,47.7,16.4,13.3,22.5
3112,56043,12.5,30.8,27.0,8.6,21.1,24.8,5.3,11.3,17.3,...,19.7,93.1,0.7,34.398545,22.8,14.7,53.4,20.4,11.9,14.3


In [84]:
demo_data=demo_data.merge(health_data, left_on='GEO_ID',right_on='FIPS')

In [85]:
demo_data.describe()

Unnamed: 0,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,...,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value
count,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,...,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0
mean,14.224277,34.587091,21.880861,8.525498,20.783847,22.558703,9.009955,11.634586,18.002216,21.168433,...,63.754916,19.218915,16.418979,52.949615,18.539788,11.672319,16.839274,30.947559,16.923892,6.228573
std,6.542912,7.070799,3.792383,2.555754,9.139611,3.42201,3.57337,2.247991,2.122785,2.79769,...,33.772956,8.295985,6.505763,10.404013,2.787136,2.709991,7.932256,4.468338,4.959968,2.233174
min,1.3,6.5,8.3,0.6,3.0,3.9,0.7,0.0,4.9,5.2,...,0.0,0.2,1.8,14.8,6.6,1.3,2.5,10.7,7.2,1.18856
25%,9.3,30.2,19.325,6.8,14.4,20.5,7.3,10.3,16.7,19.6,...,35.146463,13.0,11.8,46.5,16.8,9.9,11.6,28.5,13.0,4.660904
50%,12.8,34.9,21.8,8.3,18.5,22.5,8.2,11.4,18.1,21.2,...,66.050346,18.7,15.7,53.4,18.6,11.9,15.0,31.2,15.9,6.018293
75%,18.3,39.6,24.3,10.1,24.7,24.3,9.5,12.7,19.2,22.7,...,100.0,24.7,19.9,60.3,20.3,13.5,19.8,33.7,20.0,7.489068
max,51.5,54.6,36.3,19.5,80.2,40.3,55.7,26.9,29.4,44.8,...,100.0,69.6,48.7,83.0,30.2,32.4,63.0,46.6,41.7,23.617721


In [87]:
#Output to latex
summary=demo_data.drop(['FIPS','GEO_ID'],axis=1 ).describe()
summary=summary.transpose()
summary=summary.round(2)
summary = summary.applymap(lambda x: '{:.2f}'.format(x).rstrip('0').rstrip('.'))
latex_code = summary.to_latex(index=True)
print(latex_code)

\begin{tabular}{lllllllll}
\toprule
 & count & mean & std & min & 25% & 50% & 75% & max \\
\midrule
Nohighschool_25plus & 3114 & 14.22 & 6.54 & 1.3 & 9.3 & 12.8 & 18.3 & 51.5 \\
Highschool_25plus & 3114 & 34.59 & 7.07 & 6.5 & 30.2 & 34.9 & 39.6 & 54.6 \\
Somedegree_25plus & 3114 & 21.88 & 3.79 & 8.3 & 19.32 & 21.8 & 24.3 & 36.3 \\
Assocdegree_25plus & 3114 & 8.53 & 2.56 & 0.6 & 6.8 & 8.3 & 10.1 & 19.5 \\
Degree_25plus & 3114 & 20.78 & 9.14 & 3 & 14.4 & 18.5 & 24.7 & 80.2 \\
Under18 & 3114 & 22.56 & 3.42 & 3.9 & 20.5 & 22.5 & 24.3 & 40.3 \\
18to24 & 3114 & 9.01 & 3.57 & 0.7 & 7.3 & 8.2 & 9.5 & 55.7 \\
25to34 & 3114 & 11.63 & 2.25 & 0 & 10.3 & 11.4 & 12.7 & 26.9 \\
35to49 & 3114 & 18 & 2.12 & 4.9 & 16.7 & 18.1 & 19.2 & 29.4 \\
50to64 & 3114 & 21.17 & 2.8 & 5.2 & 19.6 & 21.2 & 22.7 & 44.8 \\
Over65 & 3114 & 17.62 & 4.45 & 3.9 & 14.8 & 17.3 & 19.9 & 53.1 \\
White & 3114 & 83.53 & 16.41 & 10.1 & 75.8 & 89.7 & 96.1 & 99.5 \\
Black & 3114 & 9.97 & 14.48 & 0.2 & 1.2 & 3.3 & 12 & 88.6 \\
Rural 

  summary = summary.applymap(lambda x: '{:.2f}'.format(x).rstrip('0').rstrip('.'))


In [88]:
US_full_dataset=demo_data.merge(elec_data, on='GEO_ID')

In [89]:
US_full_dataset

Unnamed: 0,GEO_ID,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,...,FIPS,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value,county_name,state,DEMOCRAT_2012,REPUBLICAN_2012,DEMOCRAT_2016,REPUBLICAN_2016
0,1001,12.4,34.3,21.5,7.1,24.6,25.2,8.6,12.2,21.0,...,1001,30.9,19.4,5.883047,AUTAUGA,ALABAMA,26.587832,72.618252,23.769671,72.766588
1,1003,9.9,28.7,22.4,9.4,29.5,22.2,7.7,11.3,19.1,...,1003,26.7,16.0,6.053720,BALDWIN,ALABAMA,21.589444,77.358269,19.385601,76.545712
2,1005,26.3,34.9,17.8,8.2,12.9,21.5,9.4,13.8,19.1,...,1005,40.8,25.7,10.809275,BARBOUR,ALABAMA,51.368494,48.223130,46.527844,52.096666
3,1007,19.3,41.8,21.1,5.8,12.0,21.1,9.2,13.8,21.4,...,1007,40.1,22.0,7.137850,BIBB,ALABAMA,26.152019,72.826603,21.249575,76.403220
4,1009,19.9,32.9,22.5,11.6,13.0,23.6,8.0,11.5,19.8,...,1009,32.4,20.7,6.146908,BLOUNT,ALABAMA,12.371907,86.465884,8.425825,89.334844
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3148,56037,9.3,34.1,24.9,10.3,21.4,27.5,9.1,15.5,18.7,...,56037,29.2,13.9,4.162206,SWEETWATER,WYOMING,28.256881,67.641314,18.861646,70.951547
3149,56039,4.6,15.8,19.5,6.2,53.8,19.5,5.7,20.0,22.7,...,56039,12.5,11.1,4.795996,TETON,WYOMING,54.195743,42.376134,57.923497,31.052507
3150,56041,10.4,36.6,26.1,8.8,18.2,29.6,8.0,12.7,18.6,...,56041,29.6,14.7,5.034880,UINTA,WYOMING,19.065464,77.468088,14.191263,72.656434
3151,56043,12.5,30.8,27.0,8.6,21.1,24.8,5.3,11.3,17.3,...,56043,24.7,14.3,4.511628,WASHAKIE,WYOMING,20.131846,76.419878,13.948610,76.324069


In [90]:
summary=US_full_dataset.describe()
summary=summary.transpose()
summary=summary.round(2)
summary = summary.applymap(lambda x: '{:.2f}'.format(x).rstrip('0').rstrip('.'))
latex_code = summary.to_latex(index=True)
print(latex_code)

\begin{tabular}{lllllllll}
\toprule
 & count & mean & std & min & 25% & 50% & 75% & max \\
\midrule
Nohighschool_25plus & 3153 & 14.22 & 6.53 & 1.3 & 9.3 & 12.8 & 18.3 & 51.5 \\
Highschool_25plus & 3153 & 34.5 & 7.12 & 6.5 & 30 & 34.9 & 39.5 & 54.6 \\
Somedegree_25plus & 3153 & 21.87 & 3.8 & 8.3 & 19.3 & 21.8 & 24.3 & 36.3 \\
Assocdegree_25plus & 3153 & 8.51 & 2.55 & 0.6 & 6.8 & 8.3 & 10.1 & 19.5 \\
Degree_25plus & 3153 & 20.9 & 9.29 & 3 & 14.5 & 18.6 & 24.8 & 80.2 \\
Under18 & 3153 & 22.54 & 3.44 & 3.9 & 20.5 & 22.5 & 24.3 & 40.3 \\
18to24 & 3153 & 9.07 & 3.81 & 0.7 & 7.4 & 8.2 & 9.5 & 55.7 \\
25to34 & 3153 & 11.66 & 2.28 & 0 & 10.3 & 11.4 & 12.8 & 26.9 \\
35to49 & 3153 & 18 & 2.15 & 4.9 & 16.7 & 18.1 & 19.3 & 29.4 \\
50to64 & 3153 & 21.13 & 2.82 & 5.2 & 19.6 & 21.2 & 22.6 & 44.8 \\
Over65 & 3153 & 17.59 & 4.46 & 3.9 & 14.8 & 17.2 & 19.9 & 53.1 \\
White & 3153 & 83.3 & 16.57 & 10.1 & 75.4 & 89.4 & 96 & 99.5 \\
Black & 3153 & 10.18 & 14.68 & 0.2 & 1.2 & 3.4 & 12.4 & 88.6 \\
Rural & 315

  summary = summary.applymap(lambda x: '{:.2f}'.format(x).rstrip('0').rstrip('.'))


In [91]:
US_full_dataset.to_csv('US results/Output datasets/US_fulldataset.csv',index=False)