In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from mpl_toolkits.axes_grid1 import make_axes_locatable
import kmapper as km
import pyballmapper as pbm
import statistics
import seaborn as sns
from matplotlib.colors import ListedColormap
from matplotlib import colormaps as cm
import networkx as nx

  from .autonotebook import tqdm as notebook_tqdm


# Election Data

https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ

In [2]:
# County Presidential Election Returns 2000-2020
df=pd.read_csv('US_data/countypres_2000-2020.csv')

In [3]:
df.head()

Unnamed: 0,year,state,state_po,county_name,county_fips,office,candidate,party,candidatevotes,totalvotes,version,mode
0,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,AL GORE,DEMOCRAT,4942,17208,20220315,TOTAL
1,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,GEORGE W. BUSH,REPUBLICAN,11993,17208,20220315,TOTAL
2,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,RALPH NADER,GREEN,160,17208,20220315,TOTAL
3,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,OTHER,OTHER,113,17208,20220315,TOTAL
4,2000,ALABAMA,AL,BALDWIN,1003.0,US PRESIDENT,AL GORE,DEMOCRAT,13997,56480,20220315,TOTAL


In [48]:
## Extracting the useful columns 
cols = ['year',
        'state',
        'county_name',
        'county_fips',
         'party',
         'candidatevotes'
         ,'totalvotes']

data = df[cols]
#calculating vote percentages
data['percentagevotes']=data['candidatevotes']/data['totalvotes']*100
pv = data.pivot_table(index=['county_fips','county_name','state'], columns=['party','year'], values='percentagevotes')
pv=pv.reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['percentagevotes']=data['candidatevotes']/data['totalvotes']*100


In [5]:
pv.columns = [f'{party}_{year}' if year else f'{party}' for party, year in pv.columns]
# Reset index
pv = pv.reset_index()
pv

Unnamed: 0,index,county_fips,county_name,state,DEMOCRAT_2000,DEMOCRAT_2004,DEMOCRAT_2008,DEMOCRAT_2012,DEMOCRAT_2016,DEMOCRAT_2020,...,OTHER_2008,OTHER_2012,OTHER_2016,OTHER_2020,REPUBLICAN_2000,REPUBLICAN_2004,REPUBLICAN_2008,REPUBLICAN_2012,REPUBLICAN_2016,REPUBLICAN_2020
0,0,1001.0,AUTAUGA,ALABAMA,28.719200,23.694039,25.773021,26.587832,23.769671,27.018365,...,0.613341,0.793916,3.463741,1.544833,69.694328,75.673522,73.613637,72.618252,72.766588,71.436802
1,1,1003.0,BALDWIN,ALABAMA,24.782224,22.502885,23.811922,21.589444,19.385601,22.409030,...,0.928599,1.052286,4.068687,1.419597,72.365439,76.415176,75.259479,77.358269,76.545712,76.171373
2,2,1005.0,BARBOUR,ALABAMA,49.908610,44.836225,48.985383,51.368494,46.527844,45.788173,...,0.576096,0.408376,1.375490,0.760601,49.023569,54.736940,50.438521,48.223130,52.096666,53.451226
3,3,1007.0,BIBB,ALABAMA,38.163639,27.486842,26.596483,26.152019,21.249575,20.698280,...,0.960204,1.021378,2.347205,0.875456,60.174623,72.000000,72.443313,72.826603,76.403220,78.426264
4,4,1009.0,BLOUNT,ALABAMA,27.691537,18.312872,14.513537,12.371907,8.425825,9.569378,...,1.467013,1.162209,2.239331,0.859069,70.477939,80.850074,84.019450,86.465884,89.334844,89.571553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3190,3190,56039.0,TETON,WYOMING,38.529384,52.575051,60.457966,54.195743,57.923497,66.599040,...,2.605389,3.428123,11.023996,2.319605,52.286454,45.109605,36.936645,42.376134,31.052507,29.356868
3191,3191,56041.0,UINTA,WYOMING,22.243192,22.460092,27.524353,19.065464,14.191263,16.819960,...,4.015206,3.466448,13.152302,2.114388,73.726072,75.250588,68.460442,77.468088,72.656434,79.247278
3192,3192,56043.0,WASHAKIE,WYOMING,19.896322,20.782693,25.414634,20.131846,13.948610,16.145833,...,2.487805,3.448276,9.727320,1.760913,77.462355,77.783179,72.097561,76.419878,76.324069,80.481151
3193,3193,56045.0,WESTON,WYOMING,14.673203,17.099057,19.273579,12.563263,8.479864,10.112360,...,4.042179,3.453409,5.501985,1.320225,82.385621,80.748821,76.684241,83.983328,86.018151,87.275281


In [49]:
pv['GEO_ID'] = pv['county_fips'].astype(int)

In [50]:
elec_data=pv[['GEO_ID','county_name','state','DEMOCRAT_2012','REPUBLICAN_2012','DEMOCRAT_2016','REPUBLICAN_2016']]

KeyError: "['DEMOCRAT_2012' 'REPUBLICAN_2012' 'DEMOCRAT_2016' 'REPUBLICAN_2016'] not in index"

# Demographic Data

Census and American Community Survey from 2016 using 5 year estimates
https://data.census.gov/

In [8]:
elec_data.to_csv('US results/Output datasets/election_data_US.csv')

## Education

In [10]:
edu_data=pd.read_csv('US_data/education.csv')

In [11]:
edu_data

Unnamed: 0,GEO_ID,Nohighschool_1824,Highschool_1824,Somedegree_1824,Degree_1824,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus
0,0500000US01001,18.8,37.5,36.2,7.4,12.4,34.3,21.5,7.1,24.6
1,0500000US01003,18.7,31.9,41.3,8.1,9.9,28.7,22.4,9.4,29.5
2,0500000US01005,21.6,44.2,30.7,3.5,26.3,34.9,17.8,8.2,12.9
3,0500000US01007,25.5,31.1,42.0,1.5,19.3,41.8,21.1,5.8,12.0
4,0500000US01009,18.2,32.8,46.1,2.9,19.9,32.9,22.5,11.6,13.0
...,...,...,...,...,...,...,...,...,...,...
3215,0500000US72145,26.2,10.7,59.8,3.3,41.8,13.7,12.5,11.3,20.7
3216,0500000US72147,6.1,28.8,44.8,20.2,30.7,35.8,7.0,7.5,19.0
3217,0500000US72149,8.5,32.5,53.2,5.8,29.6,34.5,10.2,7.2,18.6
3218,0500000US72151,12.8,27.8,53.5,5.9,32.5,26.1,13.3,11.2,16.8


In [12]:
edu_data=edu_data[['GEO_ID','Nohighschool_25plus', 'Highschool_25plus',
       'Somedegree_25plus', 'Assocdegree_25plus', 'Degree_25plus']]

## Age

In [13]:
age_data=pd.read_csv('US_data/age.csv')

In [14]:
age_data.describe()

Unnamed: 0,Under18,18to24,25to29,30to34,35to39,40to44,45to49,50to54,55to59,60to64,Over65
count,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0,3220.0
mean,22.574099,9.03764,5.797329,5.87795,5.711553,5.976988,6.344441,7.201988,7.273199,6.654969,17.54823
std,3.459112,3.526647,1.342807,1.107852,1.018101,1.012998,0.880416,0.92257,1.294687,1.3412,4.459395
min,1.1,0.7,0.0,0.0,0.0,1.1,0.3,0.0,1.8,1.4,3.9
25%,20.5,7.4,5.0,5.3,5.1,5.4,5.9,6.8,6.5,5.8,14.7
50%,22.5,8.3,5.7,5.8,5.7,6.0,6.4,7.2,7.2,6.5,17.2
75%,24.3,9.6,6.4,6.4,6.3,6.6,6.9,7.7,7.9,7.3,19.825
max,40.3,55.7,15.2,15.1,10.2,15.8,12.5,13.9,31.6,15.1,53.1


In [15]:
age_data['25to34']=age_data['25to29']+age_data['30to34']
age_data['35to49']=age_data['35to39']+age_data['40to44']+age_data['45to49']
age_data['50to64']=age_data['50to54']+age_data['55to59']+age_data['60to64']

In [16]:
age_data.columns

Index(['GEO_ID', 'Under18', '18to24', '25to29', '30to34', '35to39', '40to44',
       '45to49', '50to54', '55to59', '60to64', 'Over65', '25to34', '35to49',
       '50to64'],
      dtype='object')

In [17]:
age_data=age_data[['GEO_ID', 'Under18', '18to24', '25to34', '35to49',
       '50to64','Over65', ]]

### Ethnicity

In [18]:
eth_data=pd.read_csv('US_data/ethnicity.csv')

In [19]:
eth_data

Unnamed: 0,GEO_ID,White,Black
0,0500000US01001,76.7,20.9
1,0500000US01003,88.0,9.0
2,0500000US01005,47.3,48.6
3,0500000US01007,77.5,20.8
4,0500000US01009,91.7,2.1
...,...,...,...
3216,0500000US72145,59.4,16.6
3217,0500000US72147,35.8,26.0
3218,0500000US72149,58.3,16.0
3219,0500000US72151,43.9,20.1


### Rural/Urban

In [20]:
rural_data=pd.read_csv('US_data/rural_urban.csv')

In [21]:
rural_data['Rural']=(rural_data['Rural']/rural_data['Tot_pop '])*100

In [22]:
rural_data=rural_data[['GEO_ID','Rural']]

In [23]:
rural_data.describe()

Unnamed: 0,Rural
count,3221.0
mean,62.770482
std,34.341945
min,0.0
25%,33.509863
50%,65.105101
75%,100.0
max,100.0


### Income data

In [24]:
income_data=pd.read_csv('US_Data/income_cat.csv')
income_data['GEO_ID']=income_data['GEO_ID'].astype(object)


In [25]:
income_data

Unnamed: 0,GEO_ID,lessthan50K,50to75k,75to100k,morethan100k
0,1001,46.4,19.5,13.2,20.8
1,1003,48.5,18.6,12.0,20.9
2,1005,65.9,14.6,9.4,9.9
3,1007,60.3,17.7,11.4,10.6
4,1009,53.9,21.8,11.0,13.3
...,...,...,...,...,...
3109,56037,36.3,18.6,15.1,30.0
3110,56039,29.4,20.3,14.3,36.0
3111,56041,47.7,16.4,13.3,22.5
3112,56043,53.4,20.4,11.9,14.3


In [26]:
income_data.describe()

Unnamed: 0,lessthan50K,50to75k,75to100k,morethan100k
count,3114.0,3114.0,3114.0,3114.0
mean,52.949615,18.539788,11.672319,16.839274
std,10.404013,2.787136,2.709991,7.932256
min,14.8,6.6,1.3,2.5
25%,46.5,16.8,9.9,11.6
50%,53.4,18.6,11.9,15.0
75%,60.3,20.3,13.5,19.8
max,83.0,30.2,32.4,63.0


### Industry data

In [27]:
industry_data=pd.read_csv('US_data/industry.csv')

In [28]:
industry_data.describe()

Unnamed: 0,manu_arg
count,3220.0
mean,19.032764
std,8.30797
min,0.2
25%,12.8
50%,18.5
75%,24.5
max,69.6


### Unemployment data

In [29]:
unemp_data=pd.read_csv('US_data/unemployment.csv')

In [30]:
poverty_data=pd.read_csv('US_data/poverty.csv')

In [31]:
poverty_data

Unnamed: 0,GEO_ID,Poverty
0,0500000US01001,12.3
1,0500000US01003,13.0
2,0500000US01005,26.4
3,0500000US01007,16.5
4,0500000US01009,16.5
...,...,...
3215,0500000US72145,46.0
3216,0500000US72147,35.9
3217,0500000US72149,50.8
3218,0500000US72151,49.5


In [32]:
health_data=pd.read_csv('US_data/countyhealth.csv')
health_data=health_data.dropna()
health_data['FIPS']=health_data['FIPS'].astype(object)
health_data[['Adult obesity raw value','Poor or fair health raw value','Unemployment raw value']]=health_data[['Adult obesity raw value','Poor or fair health raw value','Unemployment raw value']]*100

In [33]:
health_data.tail()

Unnamed: 0,FIPS,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value
3187,56037,29.2,13.9,4.162206
3188,56039,12.5,11.1,4.795996
3189,56041,29.6,14.7,5.03488
3190,56043,24.7,14.3,4.511628
3191,56045,29.4,13.4,3.425693


In [34]:
health_data.describe()

Unnamed: 0,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value
count,3190.0,3190.0,3190.0
mean,30.903301,16.913346,6.258686
std,4.467052,4.951684,2.287594
min,10.7,7.2,1.18856
25%,28.5,13.0,4.674522
50%,31.1,15.9,6.033636
75%,33.675,20.0,7.500364
max,46.6,41.7,23.66843


## Merged dataset

In [35]:
demo_data = (edu_data
             .merge(age_data, on='GEO_ID')
             .merge(eth_data, on='GEO_ID')
             .merge(rural_data, on='GEO_ID')
             .merge(industry_data, on='GEO_ID')
             .merge(poverty_data,on='GEO_ID'))


In [36]:
demo_data

Unnamed: 0,GEO_ID,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Black,Rural,manu_arg,Poverty
0,0500000US01001,12.4,34.3,21.5,7.1,24.6,25.2,8.6,12.2,21.0,19.1,14.0,76.7,20.9,41.030801,15.2,12.3
1,0500000US01003,9.9,28.7,22.4,9.4,29.5,22.2,7.7,11.3,19.1,21.0,18.7,88.0,9.0,32.815672,10.6,13.0
2,0500000US01005,26.3,34.9,17.8,8.2,12.9,21.5,9.4,13.8,19.1,19.7,16.5,47.3,48.6,64.882080,26.9,26.4
3,0500000US01007,19.3,41.8,21.1,5.8,12.0,21.1,9.2,13.8,21.4,19.7,14.9,77.5,20.8,100.000000,25.0,16.5
4,0500000US01009,19.9,32.9,22.5,11.6,13.0,23.6,8.0,11.5,19.8,19.9,17.2,91.7,2.1,90.719682,18.5,16.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3214,0500000US72145,41.8,13.7,12.5,11.3,20.7,22.2,9.8,12.7,19.3,19.0,17.1,59.4,16.6,5.602611,17.2,46.0
3215,0500000US72147,30.7,35.8,7.0,7.5,19.0,21.7,9.7,9.8,18.0,21.2,19.6,35.8,26.0,21.157835,7.3,35.9
3216,0500000US72149,29.6,34.5,10.2,7.2,18.6,24.2,11.4,12.2,18.4,19.7,14.1,58.3,16.0,17.892958,21.5,50.8
3217,0500000US72151,32.5,26.1,13.3,11.2,16.8,22.0,9.9,11.7,19.1,20.7,16.5,43.9,20.1,16.971225,18.9,49.5


In [37]:
demo_data['GEO_ID'] = demo_data['GEO_ID'].str.replace('0500000US', '', regex=False).astype(int)
demo_data=demo_data.merge(income_data, on='GEO_ID')

In [38]:
demo_data

Unnamed: 0,GEO_ID,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,...,Over65,White,Black,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k
0,1001,12.4,34.3,21.5,7.1,24.6,25.2,8.6,12.2,21.0,...,14.0,76.7,20.9,41.030801,15.2,12.3,46.4,19.5,13.2,20.8
1,1003,9.9,28.7,22.4,9.4,29.5,22.2,7.7,11.3,19.1,...,18.7,88.0,9.0,32.815672,10.6,13.0,48.5,18.6,12.0,20.9
2,1005,26.3,34.9,17.8,8.2,12.9,21.5,9.4,13.8,19.1,...,16.5,47.3,48.6,64.882080,26.9,26.4,65.9,14.6,9.4,9.9
3,1007,19.3,41.8,21.1,5.8,12.0,21.1,9.2,13.8,21.4,...,14.9,77.5,20.8,100.000000,25.0,16.5,60.3,17.7,11.4,10.6
4,1009,19.9,32.9,22.5,11.6,13.0,23.6,8.0,11.5,19.8,...,17.2,91.7,2.1,90.719682,18.5,16.5,53.9,21.8,11.0,13.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3109,56037,9.3,34.1,24.9,10.3,21.4,27.5,9.1,15.5,18.7,...,9.4,91.1,1.6,12.071037,25.1,11.0,36.3,18.6,15.1,30.0
3110,56039,4.6,15.8,19.5,6.2,53.8,19.5,5.7,20.0,22.7,...,11.8,91.4,0.5,62.744654,6.9,7.3,29.4,20.3,14.3,36.0
3111,56041,10.4,36.6,26.1,8.8,18.2,29.6,8.0,12.7,18.6,...,10.7,93.6,0.8,42.521028,15.5,16.8,47.7,16.4,13.3,22.5
3112,56043,12.5,30.8,27.0,8.6,21.1,24.8,5.3,11.3,17.3,...,19.7,93.1,0.7,34.398545,22.8,14.7,53.4,20.4,11.9,14.3


In [39]:
demo_data=demo_data.merge(health_data, left_on='GEO_ID',right_on='FIPS')

In [40]:
demo_data.describe()

Unnamed: 0,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,...,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value
count,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,...,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0,3114.0
mean,14.224277,34.587091,21.880861,8.525498,20.783847,22.558703,9.009955,11.634586,18.002216,21.168433,...,63.754916,19.218915,16.418979,52.949615,18.539788,11.672319,16.839274,30.947559,16.923892,6.228573
std,6.542912,7.070799,3.792383,2.555754,9.139611,3.42201,3.57337,2.247991,2.122785,2.79769,...,33.772956,8.295985,6.505763,10.404013,2.787136,2.709991,7.932256,4.468338,4.959968,2.233174
min,1.3,6.5,8.3,0.6,3.0,3.9,0.7,0.0,4.9,5.2,...,0.0,0.2,1.8,14.8,6.6,1.3,2.5,10.7,7.2,1.18856
25%,9.3,30.2,19.325,6.8,14.4,20.5,7.3,10.3,16.7,19.6,...,35.146463,13.0,11.8,46.5,16.8,9.9,11.6,28.5,13.0,4.660904
50%,12.8,34.9,21.8,8.3,18.5,22.5,8.2,11.4,18.1,21.2,...,66.050346,18.7,15.7,53.4,18.6,11.9,15.0,31.2,15.9,6.018293
75%,18.3,39.6,24.3,10.1,24.7,24.3,9.5,12.7,19.2,22.7,...,100.0,24.7,19.9,60.3,20.3,13.5,19.8,33.7,20.0,7.489068
max,51.5,54.6,36.3,19.5,80.2,40.3,55.7,26.9,29.4,44.8,...,100.0,69.6,48.7,83.0,30.2,32.4,63.0,46.6,41.7,23.617721


In [41]:
demo_data.columns

Index(['GEO_ID', 'Nohighschool_25plus', 'Highschool_25plus',
       'Somedegree_25plus', 'Assocdegree_25plus', 'Degree_25plus', 'Under18',
       '18to24', '25to34', '35to49', '50to64', 'Over65', 'White', 'Black',
       'Rural', 'manu_arg', 'Poverty', 'lessthan50K', '50to75k', '75to100k',
       'morethan100k', 'FIPS', 'Adult obesity raw value',
       'Poor or fair health raw value', 'Unemployment raw value'],
      dtype='object')

In [42]:
#Output to latex
summary=demo_data.drop(['FIPS','GEO_ID'],axis=1 ).describe()
summary=summary.transpose()
summary=summary.round(2)
summary = summary.applymap(lambda x: '{:.2f}'.format(x).rstrip('0').rstrip('.'))
latex_code = summary.to_latex(index=True)
print(latex_code)

\begin{tabular}{lllllllll}
\toprule
 & count & mean & std & min & 25% & 50% & 75% & max \\
\midrule
Nohighschool_25plus & 3114 & 14.22 & 6.54 & 1.3 & 9.3 & 12.8 & 18.3 & 51.5 \\
Highschool_25plus & 3114 & 34.59 & 7.07 & 6.5 & 30.2 & 34.9 & 39.6 & 54.6 \\
Somedegree_25plus & 3114 & 21.88 & 3.79 & 8.3 & 19.32 & 21.8 & 24.3 & 36.3 \\
Assocdegree_25plus & 3114 & 8.53 & 2.56 & 0.6 & 6.8 & 8.3 & 10.1 & 19.5 \\
Degree_25plus & 3114 & 20.78 & 9.14 & 3 & 14.4 & 18.5 & 24.7 & 80.2 \\
Under18 & 3114 & 22.56 & 3.42 & 3.9 & 20.5 & 22.5 & 24.3 & 40.3 \\
18to24 & 3114 & 9.01 & 3.57 & 0.7 & 7.3 & 8.2 & 9.5 & 55.7 \\
25to34 & 3114 & 11.63 & 2.25 & 0 & 10.3 & 11.4 & 12.7 & 26.9 \\
35to49 & 3114 & 18 & 2.12 & 4.9 & 16.7 & 18.1 & 19.2 & 29.4 \\
50to64 & 3114 & 21.17 & 2.8 & 5.2 & 19.6 & 21.2 & 22.7 & 44.8 \\
Over65 & 3114 & 17.62 & 4.45 & 3.9 & 14.8 & 17.3 & 19.9 & 53.1 \\
White & 3114 & 83.53 & 16.41 & 10.1 & 75.8 & 89.7 & 96.1 & 99.5 \\
Black & 3114 & 9.97 & 14.48 & 0.2 & 1.2 & 3.3 & 12 & 88.6 \\
Rural 

  summary = summary.applymap(lambda x: '{:.2f}'.format(x).rstrip('0').rstrip('.'))


In [43]:
US_full_dataset=demo_data.merge(elec_data, on='GEO_ID')

In [44]:
US_full_dataset

Unnamed: 0,GEO_ID,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,...,FIPS,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value,county_name,state,DEMOCRAT_2012,REPUBLICAN_2012,DEMOCRAT_2016,REPUBLICAN_2016
0,1001,12.4,34.3,21.5,7.1,24.6,25.2,8.6,12.2,21.0,...,1001,30.9,19.4,5.883047,AUTAUGA,ALABAMA,26.587832,72.618252,23.769671,72.766588
1,1003,9.9,28.7,22.4,9.4,29.5,22.2,7.7,11.3,19.1,...,1003,26.7,16.0,6.053720,BALDWIN,ALABAMA,21.589444,77.358269,19.385601,76.545712
2,1005,26.3,34.9,17.8,8.2,12.9,21.5,9.4,13.8,19.1,...,1005,40.8,25.7,10.809275,BARBOUR,ALABAMA,51.368494,48.223130,46.527844,52.096666
3,1007,19.3,41.8,21.1,5.8,12.0,21.1,9.2,13.8,21.4,...,1007,40.1,22.0,7.137850,BIBB,ALABAMA,26.152019,72.826603,21.249575,76.403220
4,1009,19.9,32.9,22.5,11.6,13.0,23.6,8.0,11.5,19.8,...,1009,32.4,20.7,6.146908,BLOUNT,ALABAMA,12.371907,86.465884,8.425825,89.334844
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3109,56037,9.3,34.1,24.9,10.3,21.4,27.5,9.1,15.5,18.7,...,56037,29.2,13.9,4.162206,SWEETWATER,WYOMING,28.256881,67.641314,18.861646,70.951547
3110,56039,4.6,15.8,19.5,6.2,53.8,19.5,5.7,20.0,22.7,...,56039,12.5,11.1,4.795996,TETON,WYOMING,54.195743,42.376134,57.923497,31.052507
3111,56041,10.4,36.6,26.1,8.8,18.2,29.6,8.0,12.7,18.6,...,56041,29.6,14.7,5.034880,UINTA,WYOMING,19.065464,77.468088,14.191263,72.656434
3112,56043,12.5,30.8,27.0,8.6,21.1,24.8,5.3,11.3,17.3,...,56043,24.7,14.3,4.511628,WASHAKIE,WYOMING,20.131846,76.419878,13.948610,76.324069


In [45]:
summary=US_full_dataset.describe()
summary=summary.transpose()
summary=summary.round(2)
summary = summary.applymap(lambda x: '{:.2f}'.format(x).rstrip('0').rstrip('.'))
latex_code = summary.to_latex(index=True)
print(latex_code)

\begin{tabular}{lllllllll}
\toprule
 & count & mean & std & min & 25% & 50% & 75% & max \\
\midrule
Nohighschool_25plus & 3114 & 14.22 & 6.54 & 1.3 & 9.3 & 12.8 & 18.3 & 51.5 \\
Highschool_25plus & 3114 & 34.59 & 7.07 & 6.5 & 30.2 & 34.9 & 39.6 & 54.6 \\
Somedegree_25plus & 3114 & 21.88 & 3.79 & 8.3 & 19.32 & 21.8 & 24.3 & 36.3 \\
Assocdegree_25plus & 3114 & 8.53 & 2.56 & 0.6 & 6.8 & 8.3 & 10.1 & 19.5 \\
Degree_25plus & 3114 & 20.78 & 9.14 & 3 & 14.4 & 18.5 & 24.7 & 80.2 \\
Under18 & 3114 & 22.56 & 3.42 & 3.9 & 20.5 & 22.5 & 24.3 & 40.3 \\
18to24 & 3114 & 9.01 & 3.57 & 0.7 & 7.3 & 8.2 & 9.5 & 55.7 \\
25to34 & 3114 & 11.63 & 2.25 & 0 & 10.3 & 11.4 & 12.7 & 26.9 \\
35to49 & 3114 & 18 & 2.12 & 4.9 & 16.7 & 18.1 & 19.2 & 29.4 \\
50to64 & 3114 & 21.17 & 2.8 & 5.2 & 19.6 & 21.2 & 22.7 & 44.8 \\
Over65 & 3114 & 17.62 & 4.45 & 3.9 & 14.8 & 17.3 & 19.9 & 53.1 \\
White & 3114 & 83.53 & 16.41 & 10.1 & 75.8 & 89.7 & 96.1 & 99.5 \\
Black & 3114 & 9.97 & 14.48 & 0.2 & 1.2 & 3.3 & 12 & 88.6 \\
Rural 

  summary = summary.applymap(lambda x: '{:.2f}'.format(x).rstrip('0').rstrip('.'))


In [46]:
US_full_dataset.to_csv('US results/Outputs/US_fulldataset.csv',index=False)

OSError: Cannot save file into a non-existent directory: 'US results/Outputs'

In [None]:
columns=['REPUBLICAN_2016']
ball_rep=pd.DataFrame()
for col in columns:
    ball_rep[col]=US_results.groupby('ball')[col].mean()
ball_rep['Size'] = US_results.groupby('ball').size()

NameError: name 'US_results' is not defined

In [None]:
ball_rep

Unnamed: 0_level_0,REPUBLICAN_2016,Size
ball,Unnamed: 1_level_1,Unnamed: 2_level_1
0,63.292547,926
1,56.907903,156
2,72.156954,922
3,37.005677,47
4,55.738564,174
5,69.101354,901
6,46.448863,251
7,36.035029,24
8,61.656207,1
9,43.074233,1


In [None]:
ball_rep[ball_rep['REPUBLICAN_2016']<63].describe()

Unnamed: 0,REPUBLICAN_2016,Size
count,33.0,33.0
mean,47.184428,91.212121
std,11.674933,93.550707
min,25.860396,1.0
25%,37.213778,18.0
50%,49.231496,53.0
75%,57.549769,139.0
max,61.656207,355.0


In [None]:
ball_rep[ball_rep['REPUBLICAN_2016']>63].describe()

Unnamed: 0,REPUBLICAN_2016,Size
count,26.0,26.0
mean,71.648567,297.576923
std,5.889065,312.913684
min,63.292547,1.0
25%,66.288186,38.25
50%,72.484009,169.5
75%,75.301374,533.75
max,89.230769,926.0


## Analysing the balls

In [None]:
pd.set_option('display.max_columns', 1000)
US_results[(US_results['ball']==45)&(US_results['REPUBLICAN_2012']<US_results['DEMOCRAT_2012'])].describe()

Unnamed: 0,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Black,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value,DEMOCRAT_2012,REPUBLICAN_2012,DEMOCRAT_2016,REPUBLICAN_2016,vote_gains,seat_gains,point,ball
count,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0
mean,9.925,38.022917,21.585417,11.060417,19.416667,21.591667,8.939583,10.99375,17.814583,22.220833,18.452083,94.672917,2.464583,60.181444,22.90625,13.014583,49.697917,20.441667,13.260417,16.597917,30.933333,13.025,5.785313,54.090252,43.194998,40.056687,52.938076,9.743078,68.75,1612.8125,45.0
std,2.452224,3.636589,2.689538,1.780717,3.856431,2.194076,2.953738,0.987455,1.311324,1.585165,2.273528,3.163202,1.452875,12.543692,5.093943,3.054191,5.023752,1.35926,1.736681,3.371374,2.885572,1.746303,1.277994,3.401839,3.618061,4.325821,4.730064,3.642752,46.841744,896.551304,0.0
min,4.7,33.2,15.0,7.1,13.1,15.6,6.5,9.2,15.2,18.5,13.9,85.6,0.8,36.583679,11.9,7.3,37.4,16.9,8.1,9.7,23.8,10.1,3.826943,47.599047,32.267745,31.790421,39.52132,1.963527,0.0,596.0,45.0
25%,7.95,34.975,19.9,10.0,16.475,20.2,7.2,10.375,16.8,21.575,16.875,93.325,1.3,49.242589,19.75,11.025,46.0,19.7,12.4,14.35,29.675,11.8,4.786968,50.971886,41.03392,37.185297,49.867708,7.439353,0.0,813.25,45.0
50%,10.0,37.85,21.35,11.2,19.0,21.95,7.9,10.9,18.0,21.9,18.5,95.75,2.1,63.334728,22.8,13.05,50.35,20.55,13.15,16.65,30.7,12.7,5.432681,53.968567,42.567064,39.265355,53.761373,10.212712,100.0,1304.0,45.0
75%,11.825,39.85,22.9,12.125,21.675,22.925,8.7,11.725,18.825,22.825,19.825,97.025,3.35,70.533247,25.45,15.325,52.775,21.225,14.15,18.625,32.6,14.125,6.695518,56.731441,46.442118,43.077868,56.45913,12.670349,100.0,2117.5,45.0
max,14.6,51.1,28.2,14.5,30.2,26.7,19.1,13.3,20.6,25.7,23.3,98.7,5.7,84.891045,37.9,20.6,64.4,24.1,17.6,26.3,38.5,17.5,8.740741,60.624497,49.154917,53.756426,60.91154,17.975555,100.0,3081.0,45.0


In [None]:
US_results[(US_results['ball']==45)&(US_results['REPUBLICAN_2016']<US_results['DEMOCRAT_2016'])].describe()

Unnamed: 0,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Black,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value,DEMOCRAT_2012,REPUBLICAN_2012,DEMOCRAT_2016,REPUBLICAN_2016,vote_gains,seat_gains,point,ball
count,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
mean,9.675,36.525,19.575,9.6,24.65,20.75,8.85,11.375,19.275,22.775,16.975,96.05,2.2,68.178747,17.9,11.375,45.6,20.075,14.175,20.2,28.125,11.5,5.134531,59.37339,36.885456,48.096041,42.797934,5.912478,0.0,2197.5,45.0
std,2.970269,1.55,1.510794,0.816497,3.738538,2.716002,3.468429,1.014479,1.037224,1.291962,3.617895,2.000833,1.867262,6.210086,4.726521,3.484609,3.652396,0.556028,1.811767,2.264214,3.086935,1.321615,1.125503,1.146267,3.462159,4.2038,3.068529,4.710096,0.0,873.533247,0.0
min,7.0,34.9,18.4,8.4,22.2,18.3,6.5,10.4,18.1,21.3,13.9,93.1,1.2,63.427576,14.3,8.6,42.0,19.5,12.4,18.4,23.8,10.6,4.104232,57.996067,32.267745,43.673813,39.52132,2.144241,0.0,1156.0,45.0
25%,7.6,35.35,18.55,9.45,22.575,18.45,7.025,10.55,18.775,22.125,14.8,95.725,1.275,63.884105,14.45,8.825,42.825,19.65,13.075,18.4,27.1,10.6,4.450651,58.712221,35.992067,46.097553,40.537469,3.268068,0.0,1646.5,45.0
50%,9.05,36.65,19.1,9.9,23.1,20.75,7.45,11.4,19.2,22.7,15.95,96.8,1.3,66.197456,16.45,10.4,45.35,20.1,13.85,19.65,28.9,11.0,4.872886,59.436498,37.305293,47.476963,42.940818,4.383983,0.0,2296.5,45.0
75%,11.125,37.825,20.125,10.05,25.175,23.05,9.275,12.225,19.7,23.35,18.125,97.125,2.225,70.492098,19.9,12.95,48.125,20.525,14.95,21.45,29.925,11.9,5.556766,60.097666,38.198682,49.475451,45.201283,7.028393,0.0,2847.5,45.0
max,13.6,37.9,21.7,10.2,30.2,23.2,14.0,12.3,20.6,24.4,22.1,97.5,5.0,76.892501,24.4,16.1,49.7,20.6,16.6,23.1,30.9,13.4,6.688119,60.624497,40.663492,53.756426,45.788781,12.737706,0.0,3041.0,45.0


In [None]:
US_results[US_results['ball']==33].describe()

Unnamed: 0,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Black,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value,DEMOCRAT_2012,REPUBLICAN_2012,DEMOCRAT_2016,REPUBLICAN_2016,vote_gains,seat_gains,point,ball
count,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0
mean,24.354545,32.6,21.554545,7.045455,14.459091,27.631818,8.322727,12.722727,17.809091,19.159091,14.345455,85.390909,1.627273,100.0,38.540909,13.440909,47.45,20.85,12.877273,18.831818,31.195455,17.25,3.854738,24.068023,74.286328,19.277524,75.830504,1.544176,4.545455,1792.772727,33.0
std,6.452148,5.917287,4.551999,2.567142,4.086407,3.792899,1.715734,2.679185,1.371447,2.170288,2.767279,8.095731,0.989075,0.0,5.94616,4.387717,5.031591,2.924405,2.76456,3.95792,2.680703,2.879443,1.223743,11.368069,11.75969,7.614406,8.706103,4.198965,21.320072,949.519625,0.0
min,12.1,19.0,10.1,2.8,7.7,19.0,2.6,9.6,14.8,15.6,10.5,72.9,0.5,100.0,25.7,3.3,38.2,14.4,7.0,11.6,26.4,12.1,2.311796,3.448276,48.711755,3.144654,57.859327,-6.213469,0.0,75.0,33.0
25%,19.825,27.525,19.2,5.15,11.275,25.575,7.725,10.975,17.025,17.375,12.525,79.475,0.925,100.0,36.625,11.9,44.7,19.1,11.825,16.55,29.5,15.15,3.119069,17.057145,67.926855,14.020677,72.237344,-1.05693,0.0,898.75,33.0
50%,23.9,33.85,21.55,7.05,13.8,28.7,8.65,12.4,18.0,19.2,13.8,84.85,1.2,100.0,38.6,13.15,47.7,20.65,13.25,18.4,30.45,17.45,3.641321,22.020814,76.103307,19.070465,76.363766,0.844976,0.0,2129.0,33.0
75%,26.975,36.8,24.75,9.2,17.475,29.5,9.375,13.9,18.8,20.525,15.925,92.0,2.2,100.0,40.65,15.55,51.175,22.75,14.3,21.05,33.475,19.125,4.249519,29.725602,81.44939,22.014309,80.469422,3.223609,0.0,2666.5,33.0
max,41.6,42.5,29.6,10.9,23.2,33.6,10.3,22.3,20.6,23.3,22.3,99.1,4.1,100.0,52.3,22.7,58.3,26.2,18.4,27.8,37.5,22.3,7.072161,49.355878,95.862069,33.394495,93.710692,12.570194,100.0,3028.0,33.0


In [None]:
states = ['IOWA', 'MICHIGAN', 'OHIO', 'PENNSYLVANIA', 'WISCONSIN','INDIANA','MISSOURI','WEST VIRGINIA']
US_results[(US_results['state'].isin(states))&(US_results['ball']==40)].describe()

Unnamed: 0,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Black,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value,DEMOCRAT_2012,REPUBLICAN_2012,DEMOCRAT_2016,REPUBLICAN_2016,vote_gains,seat_gains,point,ball
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,11.572549,43.65098,20.039216,9.605882,15.15098,22.217647,7.178431,10.082353,16.941176,22.952941,20.619608,96.780392,1.270588,99.684363,30.805882,13.554902,55.088235,20.062745,11.694118,13.139216,32.047059,14.541176,5.639427,39.413546,58.528056,25.459171,69.932165,11.40411,15.686275,1564.882353,40.0
std,2.975707,3.466086,2.493999,2.691053,2.365364,2.507804,0.651249,0.798174,1.097666,1.711649,2.168319,2.462277,0.844108,2.050784,4.685911,3.331085,4.937637,1.958873,2.115033,2.739568,1.686221,2.630223,1.439194,8.90657,8.68018,6.560954,7.407333,3.196059,36.729002,857.859467,0.0
min,6.7,32.9,13.3,4.1,9.7,12.4,5.8,8.0,14.6,19.9,16.0,85.7,0.4,85.375531,22.1,9.0,46.1,15.0,4.3,6.8,27.6,10.6,3.428662,21.329305,38.643703,12.378223,53.850091,5.256924,0.0,673.0,40.0
25%,9.65,41.8,18.55,7.75,13.4,21.15,6.8,9.65,16.2,21.95,19.15,96.35,0.8,100.0,27.55,11.0,51.75,19.05,10.4,11.35,31.05,12.45,4.642057,33.417355,53.273157,20.841515,64.487916,9.424023,0.0,821.0,40.0
50%,11.4,43.4,20.5,9.3,15.1,22.7,7.1,10.2,16.8,22.5,20.3,97.6,0.9,100.0,30.0,13.2,53.7,20.0,11.8,13.5,32.2,13.9,5.461302,39.2618,58.654689,24.308481,70.37415,11.050191,0.0,1461.0,40.0
75%,12.7,45.9,22.0,11.45,16.65,23.45,7.6,10.55,17.6,23.8,22.35,98.2,1.35,100.0,34.15,14.95,58.15,21.2,13.15,15.1,32.95,16.1,6.688871,44.758096,63.867926,29.891032,75.762124,13.236241,0.0,2153.0,40.0
max,19.7,49.9,24.6,15.0,21.6,29.4,9.6,11.7,19.8,28.4,26.9,98.9,3.9,100.0,46.1,22.9,68.4,24.9,16.5,19.3,36.1,22.3,9.371362,59.590958,76.88827,41.213894,85.157593,22.81071,100.0,3088.0,40.0


In [None]:
states = ['IOWA', 'MICHIGAN', 'OHIO', 'PENNSYLVANIA', 'WISCONSIN','INDIANA','MISSOURI','WEST VIRGINIA']
states = ['MICHIGAN', 'PENNSYLVANIA', 'WISCONSIN']

US_results[US_results['state'].isin(states)]

Unnamed: 0,GEO_ID,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Black,Rural,manu_arg,Poverty,lessthan50K,50to75k,75to100k,morethan100k,FIPS,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value,county_name,state,DEMOCRAT_2012,REPUBLICAN_2012,DEMOCRAT_2016,REPUBLICAN_2016,vote_gains,seat_gains,point,ball
4070,26001,11.5,40.2,25.3,8.6,14.5,13.2,5.1,6.2,13.4,27.8,34.5,98.4,0.8,89.808048,19.0,14.9,64.3,18.9,9.3,7.5,26001,30.7,13.0,10.453598,ALCONA,MICHIGAN,40.498034,58.502621,27.944498,67.779929,9.277308,0,1204,18
4071,26001,11.5,40.2,25.3,8.6,14.5,13.2,5.1,6.2,13.4,27.8,34.5,98.4,0.8,89.808048,19.0,14.9,64.3,18.9,9.3,7.5,26001,30.7,13.0,10.453598,ALCONA,MICHIGAN,40.498034,58.502621,27.944498,67.779929,9.277308,0,1204,42
4072,26003,10.6,44.3,19.1,7.9,18.1,15.5,7.4,10.4,17.5,25.8,23.4,88.5,7.1,100.000000,16.7,13.1,58.6,18.4,12.8,10.2,26003,32.3,13.3,10.256410,ALGER,MICHIGAN,47.899524,50.454742,36.808322,57.215582,6.760840,0,1205,2
4073,26003,10.6,44.3,19.1,7.9,18.1,15.5,7.4,10.4,17.5,25.8,23.4,88.5,7.1,100.000000,16.7,13.1,58.6,18.4,12.8,10.2,26003,32.3,13.3,10.256410,ALGER,MICHIGAN,47.899524,50.454742,36.808322,57.215582,6.760840,0,1205,18
4074,26003,10.6,44.3,19.1,7.9,18.1,15.5,7.4,10.4,17.5,25.8,23.4,88.5,7.1,100.000000,16.7,13.1,58.6,18.4,12.8,10.2,26003,32.3,13.3,10.256410,ALGER,MICHIGAN,47.899524,50.454742,36.808322,57.215582,6.760840,0,1205,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10671,55137,13.8,41.8,21.8,7.6,15.1,18.8,6.4,10.1,17.1,25.3,22.4,94.4,2.0,99.714480,27.8,11.5,53.5,20.7,11.7,14.1,55137,32.0,13.5,7.160494,WAUSHARA,WISCONSIN,44.281208,54.465471,31.602634,63.913659,9.448188,0,3088,42
10672,55139,7.9,34.0,20.8,10.4,26.9,20.9,12.3,13.1,18.6,20.2,14.7,91.1,4.1,17.613024,25.6,12.2,46.0,21.6,13.4,19.0,55139,28.1,12.3,5.305674,WINNEBAGO,WISCONSIN,50.967221,47.236271,42.520426,49.857707,2.621436,100,3089,0
10673,55139,7.9,34.0,20.8,10.4,26.9,20.9,12.3,13.1,18.6,20.2,14.7,91.1,4.1,17.613024,25.6,12.2,46.0,21.6,13.4,19.0,55139,28.1,12.3,5.305674,WINNEBAGO,WISCONSIN,50.967221,47.236271,42.520426,49.857707,2.621436,100,3089,25
10674,55141,7.6,38.9,21.0,12.1,20.4,21.9,7.4,11.4,17.7,23.0,18.6,95.1,1.6,33.115286,22.9,10.6,50.1,20.5,13.0,16.5,55141,34.2,12.3,6.072165,WOOD,WISCONSIN,47.766067,50.652956,37.650794,56.886243,6.233287,0,3090,0


In [None]:
import pandas as pd

# Example DataFrame (replace this with actual data)
# US_results = pd.read_csv('path_to_your_data.csv')

# List of states to analyze
states = ['IOWA', 'MICHIGAN', 'OHIO', 'PENNSYLVANIA', 'WISCONSIN', 'INDIANA', 'MISSOURI', 'WEST VIRGINIA']
states = ['MICHIGAN', 'PENNSYLVANIA', 'WISCONSIN']

# Calculate the total occurrences of each unique value in the 'ball' column
total_ball_counts = US_results['ball'].value_counts()

# Filter the DataFrame to include only rows from the specified states
filtered_data = US_results[US_results['state'].isin(states)]

# Calculate the occurrences of each unique value in the 'ball' column for the filtered data
filtered_ball_counts = filtered_data['ball'].value_counts()

# Compute the proportion of each ball that belongs to the specified states
ball_proportions = filtered_ball_counts / total_ball_counts

# Convert the proportions to a DataFrame for better display
ball_proportions_df = ball_proportions.to_frame(name='Proportion')

# Fill NaN values with 0 to indicate that the ball did not appear in the filtered data
ball_proportions_df.fillna(0, inplace=True)

# Display the result
print(ball_proportions_df.sort_values(by='Proportion'))


      Proportion
ball            
58      0.000000
39      0.000000
38      0.000000
36      0.000000
48      0.000000
49      0.000000
50      0.000000
31      0.000000
57      0.000000
51      0.000000
52      0.000000
53      0.000000
47      0.000000
55      0.000000
16      0.000000
20      0.000000
14      0.000000
1       0.000000
9       0.000000
3       0.000000
4       0.000000
8       0.000000
56      0.000000
7       0.000000
12      0.005435
24      0.011111
54      0.012579
11      0.018182
13      0.021583
21      0.023474
37      0.026316
19      0.032000
28      0.033333
6       0.043825
33      0.045455
15      0.045802
27      0.047170
30      0.055556
41      0.056034
23      0.056604
2       0.060738
10      0.061947
29      0.068966
44      0.070588
18      0.084046
25      0.086957
0       0.088553
34      0.090361
5       0.093230
17      0.093333
42      0.100559
26      0.100629
46      0.104167
22      0.107042
32      0.107452
43      0.111111
40      0.1456

In [None]:
bmx1.columns

Index(['Nohighschool_25plus', 'Highschool_25plus', 'Somedegree_25plus',
       'Assocdegree_25plus', 'Degree_25plus', 'Under18', '18to24', '25to34',
       '35to49', '50to64', 'Over65', 'White', 'Rural', 'lessthan50K',
       '50to75k', '75to100k', 'morethan100k', 'manu_arg', 'Poverty',
       'Adult obesity raw value', 'Poor or fair health raw value',
       'Unemployment raw value'],
      dtype='object')

In [None]:
columns=['REPUBLICAN_2012','REPUBLICAN_2016','Nohighschool_25plus', 'Highschool_25plus', 'Somedegree_25plus',
       'Assocdegree_25plus', 'Degree_25plus', 'Under18', '18to24', '25to34',
       '35to49', '50to64', 'Over65', 'White', 'Rural', 'lessthan50K',
       '50to75k', '75to100k', 'morethan100k', 'manu_arg', 'Poverty',
       'Adult obesity raw value', 'Poor or fair health raw value',
       'Unemployment raw value']
ball_means=pd.DataFrame()
for col in columns:
    ball_means[col]=US_results.groupby('ball')[col].mean()

In [None]:
ball_means

Unnamed: 0_level_0,REPUBLICAN_2012,REPUBLICAN_2016,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Rural,lessthan50K,50to75k,75to100k,morethan100k,manu_arg,Poverty,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value
ball,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
0,60.149892,63.292547,12.457019,33.490497,22.833909,9.179806,22.040713,23.035637,9.614039,12.144816,18.221706,20.545356,16.431102,85.680022,39.95043,50.145572,19.293629,12.557667,18.007559,17.634665,14.933153,30.781102,15.901944,5.824436
1,53.497064,56.907903,20.928205,36.996795,20.345513,7.348077,14.382692,23.521154,9.396795,12.088462,18.266667,20.335256,16.380128,58.548077,63.182478,63.775,16.132692,9.079487,11.004487,20.664744,24.609615,35.869872,23.375641,8.258201
2,65.727843,72.156954,16.99013,39.057592,21.283623,7.740672,14.926139,22.103905,7.87039,10.877115,17.897072,21.851193,19.399024,87.117896,96.734544,58.608785,18.33872,10.573319,12.481887,22.314751,18.008026,32.456941,18.239479,6.501271
3,33.833089,37.005677,25.185106,37.189362,18.608511,6.629787,12.404255,22.853191,9.585106,12.274468,17.855319,20.82766,16.604255,35.091489,98.888304,71.925532,13.823404,7.034043,7.219149,24.940426,31.702128,38.761702,28.131915,10.732063
4,51.550907,55.738564,21.963793,39.114943,19.594828,6.804598,12.52069,22.38908,8.894253,11.77931,18.163218,21.190805,17.587356,57.185632,96.598167,66.250575,15.683333,8.435057,9.625862,22.532184,25.738506,35.658621,23.923563,8.568748
5,63.29116,69.101354,15.449501,37.696115,21.504329,8.469367,16.884129,22.498557,8.709767,11.338513,18.197891,21.280355,17.973807,87.926748,67.253498,55.955605,18.865705,11.329301,13.851165,20.680466,17.21121,32.164262,17.587791,6.626545
6,47.669247,46.448863,13.453386,27.874104,22.503586,8.26255,27.908765,23.770518,10.827888,14.271315,18.819522,18.818327,13.495219,64.193227,11.480452,49.073705,18.094821,12.003187,20.830279,11.311155,17.123506,29.613147,17.686056,6.497264
7,33.430072,36.035029,22.058333,33.370833,21.308333,7.908333,15.366667,24.383333,10.4,12.483333,17.65,20.0375,15.054167,34.916667,52.817042,68.454167,14.7,7.783333,9.0375,18.6,30.695833,38.7375,26.575,10.396488
8,53.933766,61.656207,15.6,42.0,23.3,7.0,11.9,12.3,10.5,13.6,29.1,27.8,6.7,25.1,100.0,35.7,21.8,15.6,26.9,69.6,15.5,31.1,17.0,4.585931
9,40.050132,43.074233,12.5,40.3,25.1,6.4,15.7,15.7,11.6,18.5,25.3,23.2,5.7,37.8,100.0,26.4,15.9,19.2,38.6,39.1,7.7,36.2,14.0,4.933243


In [None]:
ball_means.sort_values(by='White',ascending=True)

Unnamed: 0_level_0,REPUBLICAN_2012,REPUBLICAN_2016,Nohighschool_25plus,Highschool_25plus,Somedegree_25plus,Assocdegree_25plus,Degree_25plus,Under18,18to24,25to34,35to49,50to64,Over65,White,Rural,lessthan50K,50to75k,75to100k,morethan100k,manu_arg,Poverty,Adult obesity raw value,Poor or fair health raw value,Unemployment raw value
ball,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
8,53.933766,61.656207,15.6,42.0,23.3,7.0,11.9,12.3,10.5,13.6,29.1,27.8,6.7,25.1,100.0,35.7,21.8,15.6,26.9,69.6,15.5,31.1,17.0,4.585931
11,32.173088,35.584557,21.481818,35.807273,20.661818,7.889091,14.181818,26.481818,10.423636,12.001818,16.934545,19.547273,14.623636,31.347273,98.705752,69.269091,14.538182,7.585455,8.610909,18.850909,33.478182,38.947273,27.66,10.438253
30,27.240753,26.699556,17.022222,29.777778,22.355556,7.044444,23.816667,24.122222,10.455556,14.955556,18.35,19.061111,13.044444,33.527778,5.909676,58.472222,16.661111,10.066667,14.777778,9.716667,24.183333,33.988889,22.777778,8.287212
7,33.430072,36.035029,22.058333,33.370833,21.308333,7.908333,15.366667,24.383333,10.4,12.483333,17.65,20.0375,15.054167,34.916667,52.817042,68.454167,14.7,7.783333,9.0375,18.6,30.695833,38.7375,26.575,10.396488
3,33.833089,37.005677,25.185106,37.189362,18.608511,6.629787,12.404255,22.853191,9.585106,12.274468,17.855319,20.82766,16.604255,35.091489,98.888304,71.925532,13.823404,7.034043,7.219149,24.940426,31.702128,38.761702,28.131915,10.732063
9,40.050132,43.074233,12.5,40.3,25.1,6.4,15.7,15.7,11.6,18.5,25.3,23.2,5.7,37.8,100.0,26.4,15.9,19.2,38.6,39.1,7.7,36.2,14.0,4.933243
39,30.183303,27.591384,14.08,25.3,19.49,7.39,33.73,24.31,9.33,15.19,21.83,18.37,10.98,38.39,2.236622,36.46,18.88,13.5,31.19,5.83,12.45,26.67,15.83,6.026485
14,47.847441,49.403716,18.731132,34.104717,21.881132,7.676415,17.613208,24.565094,10.083019,13.027358,18.120755,19.535849,14.683019,51.151887,36.687256,61.30283,16.400943,9.610377,12.683962,16.964151,24.74434,35.512264,23.05283,8.558139
38,43.416019,42.062505,9.688889,28.322222,23.177778,8.822222,30.011111,25.488889,8.655556,12.811111,21.311111,19.755556,11.933333,55.655556,23.947975,31.177778,17.711111,14.988889,36.133333,6.877778,9.088889,29.011111,13.688889,5.383683
4,51.550907,55.738564,21.963793,39.114943,19.594828,6.804598,12.52069,22.38908,8.894253,11.77931,18.163218,21.190805,17.587356,57.185632,96.598167,66.250575,15.683333,8.435057,9.625862,22.532184,25.738506,35.658621,23.923563,8.568748


In [None]:
ball_means.to_csv('Outputs/US_ballmeans.csv')