In [1]:
!pip install census



In [2]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import pymongo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import tensorflow as tf

# Census API Key
from config import api_key
#c = Census(api_key, year=2016)

In [3]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
def get_census(year):
    c = Census(api_key, year=year)
    census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), 
                         {'for': 'county:*'})
                        # {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
    census_pd = pd.DataFrame(census_data)

# Column Reordering
    census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "county": "County"
                                      })

# Add in Poverty Rate (Poverty Count / Population)
    census_pd["Poverty Rate"] = 100 * \
        census_pd["Poverty Count"].astype(
            int) / census_pd["Population"].astype(int)

# Final DataFrame
    census_pd = census_pd[["Name","County", "Population", "Median Age", "Household Income",
                           "Per Capita Income", "Poverty Count", "Poverty Rate"]]

    census_pd['county_name'] = census_pd['Name'].str.replace(r" County,(.*)",'').str.upper()
    census_pd['state'] = census_pd['Name'].str.replace(r"(.*) County, ",'').str.upper()
# Visualize
    
    return census_pd

census_2012 = get_census(2012)
census_2012.head()
census_2012['year'] = 2012

census_2016 = get_census(2016)
census_2016.head()
census_2016['year'] = 2016

census_2020 = get_census(2020)
census_2020['year'] = 2020
census_2020.head()
census = pd.concat([census_2012,census_2016,census_2020])




# Data Shaping

In [4]:
census.dtypes

Name                  object
County                object
Population           float64
Median Age           float64
Household Income     float64
Per Capita Income    float64
Poverty Count        float64
Poverty Rate         float64
county_name           object
state                 object
year                   int64
dtype: object

In [5]:
census = census[census['Household Income']>0]
census

Unnamed: 0,Name,County,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,county_name,state,year
0,"Linn County, Missouri",115,12668.0,43.0,39028.0,20968.0,1678.0,13.245974,LINN,MISSOURI,2012
1,"Howell County, Missouri",091,40330.0,39.6,34148.0,17763.0,8187.0,20.300025,HOWELL,MISSOURI,2012
2,"Johnson County, Missouri",101,52964.0,29.7,47960.0,21375.0,8075.0,15.246205,JOHNSON,MISSOURI,2012
3,"Laclede County, Missouri",105,35507.0,39.1,39101.0,19788.0,6478.0,18.244290,LACLEDE,MISSOURI,2012
4,"Maries County, Missouri",125,9140.0,43.1,44885.0,21883.0,1286.0,14.070022,MARIES,MISSOURI,2012
...,...,...,...,...,...,...,...,...,...,...,...
3216,"Renville County, Minnesota",129,14572.0,44.0,58542.0,31243.0,1373.0,9.422180,RENVILLE,MINNESOTA,2020
3217,"Roseau County, Minnesota",135,15259.0,41.6,62304.0,31452.0,1133.0,7.425126,ROSEAU,MINNESOTA,2020
3218,"Sherburne County, Minnesota",141,96015.0,36.1,88671.0,36022.0,4953.0,5.158569,SHERBURNE,MINNESOTA,2020
3219,"Steele County, Minnesota",147,36710.0,39.2,68172.0,34648.0,2887.0,7.864342,STEELE,MINNESOTA,2020


In [6]:
census.to_csv("clean_census.csv", encoding="utf-8", index=False)

In [7]:
# # Save as a csv
# # Note to avoid any issues later, use encoding="utf-8"
winners_pd=pd.read_csv("countypres_2000-2020_with_winner.csv")
# winners_pd = winners_pd[((winners_pd['year'] == 2016) | (winners_pd['year'] == 2020)) & (winners_pd['state']== 'GEORGIA') & (winners_pd['county_name']== "WORTH")]
winners_pd

Unnamed: 0,year,state,state_po,county_name,county_fips,office,candidate,party,candidatevotes,totalvotes,version,mode,Concat,Winner
0,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,AL GORE,DEMOCRAT,4942,17208,20220315,TOTAL,2000ALABAMAAUTAUGA,Republican
1,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,GEORGE W. BUSH,REPUBLICAN,11993,17208,20220315,TOTAL,2000ALABAMAAUTAUGA,Republican
2,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,RALPH NADER,GREEN,160,17208,20220315,TOTAL,2000ALABAMAAUTAUGA,Republican
3,2000,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,OTHER,OTHER,113,17208,20220315,TOTAL,2000ALABAMAAUTAUGA,Republican
4,2000,ALABAMA,AL,BALDWIN,1003.0,US PRESIDENT,AL GORE,DEMOCRAT,13997,56480,20220315,TOTAL,2000ALABAMABALDWIN,Republican
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72612,2020,WYOMING,WY,WASHAKIE,56043.0,US PRESIDENT,DONALD J TRUMP,REPUBLICAN,3245,4032,20220315,TOTAL,2020WYOMINGWASHAKIE,Republican
72613,2020,WYOMING,WY,WESTON,56045.0,US PRESIDENT,JOSEPH R BIDEN JR,DEMOCRAT,360,3560,20220315,TOTAL,2020WYOMINGWESTON,Republican
72614,2020,WYOMING,WY,WESTON,56045.0,US PRESIDENT,JO JORGENSEN,LIBERTARIAN,46,3560,20220315,TOTAL,2020WYOMINGWESTON,Republican
72615,2020,WYOMING,WY,WESTON,56045.0,US PRESIDENT,OTHER,OTHER,47,3560,20220315,TOTAL,2020WYOMINGWESTON,Republican


In [8]:
#Dave's code
winners_pd = winners_pd[((winners_pd['year'] == 2012) | (winners_pd['year'] == 2016) | (winners_pd['year'] == 2020))]
winners_pd = winners_pd[((winners_pd['party'] == 'DEMOCRAT') | (winners_pd['party'] == 'REPUBLICAN'))]
#winners_pd = winners_pd[((winners_pd['state']== 'GEORGIA') & (winners_pd['county_name']== "WORTH"))]
#winners_pd.head(100)
winners_pd=winners_pd.drop_duplicates(subset=['state_po','county_name', 'year'],keep='first')

In [9]:
winners_pd

Unnamed: 0,year,state,state_po,county_name,county_fips,office,candidate,party,candidatevotes,totalvotes,version,mode,Concat,Winner
31166,2012,ALABAMA,AL,AUTAUGA,1001.0,US PRESIDENT,BARACK OBAMA,DEMOCRAT,6363,23932,20220315,TOTAL,2012ALABAMAAUTAUGA,Republican
31169,2012,ALABAMA,AL,BALDWIN,1003.0,US PRESIDENT,BARACK OBAMA,DEMOCRAT,18424,85338,20220315,TOTAL,2012ALABAMABALDWIN,Republican
31172,2012,ALABAMA,AL,BARBOUR,1005.0,US PRESIDENT,BARACK OBAMA,DEMOCRAT,5912,11509,20220315,TOTAL,2012ALABAMABARBOUR,Democrat
31175,2012,ALABAMA,AL,BIBB,1007.0,US PRESIDENT,BARACK OBAMA,DEMOCRAT,2202,8420,20220315,TOTAL,2012ALABAMABIBB,Republican
31178,2012,ALABAMA,AL,BLOUNT,1009.0,US PRESIDENT,BARACK OBAMA,DEMOCRAT,2970,24006,20220315,TOTAL,2012ALABAMABLOUNT,Republican
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72597,2020,WYOMING,WY,SWEETWATER,56037.0,US PRESIDENT,JOSEPH R BIDEN JR,DEMOCRAT,3823,16698,20220315,TOTAL,2020WYOMINGSWEETWATER,Republican
72601,2020,WYOMING,WY,TETON,56039.0,US PRESIDENT,JOSEPH R BIDEN JR,DEMOCRAT,9848,14787,20220315,TOTAL,2020WYOMINGTETON,Democrat
72605,2020,WYOMING,WY,UINTA,56041.0,US PRESIDENT,JOSEPH R BIDEN JR,DEMOCRAT,1591,9459,20220315,TOTAL,2020WYOMINGUINTA,Republican
72609,2020,WYOMING,WY,WASHAKIE,56043.0,US PRESIDENT,JOSEPH R BIDEN JR,DEMOCRAT,651,4032,20220315,TOTAL,2020WYOMINGWASHAKIE,Republican


In [10]:
cleaned_census_data = pd.merge(census, winners_pd, how='left', left_on=['year','state', 'county_name'], right_on=['year','state', 'county_name'])

cleaned_census_data = cleaned_census_data[['year','state','state_po','county_name','Population','Median Age', 'Household Income', 'Per Capita Income', 'Poverty Rate', 'Winner']]
cleaned_census_data

Unnamed: 0,year,state,state_po,county_name,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,Winner
0,2012,MISSOURI,MO,LINN,12668.0,43.0,39028.0,20968.0,13.245974,Republican
1,2012,MISSOURI,MO,HOWELL,40330.0,39.6,34148.0,17763.0,20.300025,Republican
2,2012,MISSOURI,MO,JOHNSON,52964.0,29.7,47960.0,21375.0,15.246205,Republican
3,2012,MISSOURI,MO,LACLEDE,35507.0,39.1,39101.0,19788.0,18.244290,Republican
4,2012,MISSOURI,MO,MARIES,9140.0,43.1,44885.0,21883.0,14.070022,Republican
...,...,...,...,...,...,...,...,...,...,...
9656,2020,MINNESOTA,MN,RENVILLE,14572.0,44.0,58542.0,31243.0,9.422180,Republican
9657,2020,MINNESOTA,MN,ROSEAU,15259.0,41.6,62304.0,31452.0,7.425126,Republican
9658,2020,MINNESOTA,MN,SHERBURNE,96015.0,36.1,88671.0,36022.0,5.158569,Republican
9659,2020,MINNESOTA,MN,STEELE,36710.0,39.2,68172.0,34648.0,7.864342,Republican


In [12]:
cleaned_census_data.dropna(subset =["state_po"],inplace=True)
# cleaned_census_data.drop_duplicates()
# cleaned_census_data

In [13]:
cleaned_census_data.groupby('year').sum()

Unnamed: 0_level_0,Population,Median Age,Household Income,Per Capita Income,Poverty Rate
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012,298426160.0,121679.0,136661286.0,69993693.0,46882.007175
2016,307582561.0,123750.4,143834484.0,74999435.0,47062.421839
2020,315412439.0,125295.5,165020057.0,87102240.0,41776.289741


In [14]:
cleaned_census_data.to_csv("clean_data.csv", encoding="utf-8", index=False)

In [15]:
cleaned_census_data

Unnamed: 0,year,state,state_po,county_name,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,Winner
0,2012,MISSOURI,MO,LINN,12668.0,43.0,39028.0,20968.0,13.245974,Republican
1,2012,MISSOURI,MO,HOWELL,40330.0,39.6,34148.0,17763.0,20.300025,Republican
2,2012,MISSOURI,MO,JOHNSON,52964.0,29.7,47960.0,21375.0,15.246205,Republican
3,2012,MISSOURI,MO,LACLEDE,35507.0,39.1,39101.0,19788.0,18.244290,Republican
4,2012,MISSOURI,MO,MARIES,9140.0,43.1,44885.0,21883.0,14.070022,Republican
...,...,...,...,...,...,...,...,...,...,...
9656,2020,MINNESOTA,MN,RENVILLE,14572.0,44.0,58542.0,31243.0,9.422180,Republican
9657,2020,MINNESOTA,MN,ROSEAU,15259.0,41.6,62304.0,31452.0,7.425126,Republican
9658,2020,MINNESOTA,MN,SHERBURNE,96015.0,36.1,88671.0,36022.0,5.158569,Republican
9659,2020,MINNESOTA,MN,STEELE,36710.0,39.2,68172.0,34648.0,7.864342,Republican


# Create SVM Model

In [16]:
# create numeric value for year and drop the redundant state information
cleaned_census_data['years since'] = cleaned_census_data['year'].map({2012:11, 2016: 7, 2020: 3})
shaped_data = cleaned_census_data[(cleaned_census_data['years since'] == 11) | (cleaned_census_data['years since'] == 7)]
shaped_data = shaped_data.drop(columns=['year','state','state_po'],axis=1)

In [17]:
shaped_data.head()

Unnamed: 0,county_name,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,Winner,years since
0,LINN,12668.0,43.0,39028.0,20968.0,13.245974,Republican,11
1,HOWELL,40330.0,39.6,34148.0,17763.0,20.300025,Republican,11
2,JOHNSON,52964.0,29.7,47960.0,21375.0,15.246205,Republican,11
3,LACLEDE,35507.0,39.1,39101.0,19788.0,18.24429,Republican,11
4,MARIES,9140.0,43.1,44885.0,21883.0,14.070022,Republican,11


In [18]:
svm_data_cat = shaped_data.dtypes[shaped_data.dtypes == "object"].index.tolist()
shaped_data[svm_data_cat].nunique()

county_name    1739
Winner            2
dtype: int64

In [19]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(shaped_data[svm_data_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(svm_data_cat)
encode_df.head()



Unnamed: 0,county_name_ABBEVILLE,county_name_ACCOMACK,county_name_ADA,county_name_ADAIR,county_name_ADAMS,county_name_ADDISON,county_name_AIKEN,county_name_AITKIN,county_name_ALACHUA,county_name_ALAMANCE,...,county_name_YOLO,county_name_YORK,county_name_YOUNG,county_name_YUBA,county_name_YUMA,county_name_ZAPATA,county_name_ZAVALA,county_name_ZIEBACH,Winner_Democrat,Winner_Republican
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [20]:
# Merge one-hot encoded features and drop the originals
shaped_data = shaped_data.merge(encode_df,left_index=True, right_index=True)
svm_data = shaped_data.drop(svm_data_cat,1)
svm_data.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,years since,county_name_ABBEVILLE,county_name_ACCOMACK,county_name_ADA,county_name_ADAIR,...,county_name_YOLO,county_name_YORK,county_name_YOUNG,county_name_YUBA,county_name_YUMA,county_name_ZAPATA,county_name_ZAVALA,county_name_ZIEBACH,Winner_Democrat,Winner_Republican
0,12668.0,43.0,39028.0,20968.0,13.245974,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,40330.0,39.6,34148.0,17763.0,20.300025,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,52964.0,29.7,47960.0,21375.0,15.246205,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,35507.0,39.1,39101.0,19788.0,18.24429,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,9140.0,43.1,44885.0,21883.0,14.070022,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [21]:
target = svm_data['Winner_Republican']
target_names = ['Democrat','Republican']

In [22]:
data = svm_data.drop(columns=['Winner_Democrat','Winner_Republican'])
feature_names = data.columns
data.head()

Unnamed: 0,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,years since,county_name_ABBEVILLE,county_name_ACCOMACK,county_name_ADA,county_name_ADAIR,...,county_name_YELLOWSTONE,county_name_YOAKUM,county_name_YOLO,county_name_YORK,county_name_YOUNG,county_name_YUBA,county_name_YUMA,county_name_ZAPATA,county_name_ZAVALA,county_name_ZIEBACH
0,12668.0,43.0,39028.0,20968.0,13.245974,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,40330.0,39.6,34148.0,17763.0,20.300025,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,52964.0,29.7,47960.0,21375.0,15.246205,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,35507.0,39.1,39101.0,19788.0,18.24429,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,9140.0,43.1,44885.0,21883.0,14.070022,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=10)

In [24]:
# Support vector machine rbf classifier
from sklearn.svm import SVC 
model = SVC(kernel='rbf')
model.fit(X_train, y_train)

SVC()

In [25]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

Test Acc: 0.804


In [26]:
# Calculate the classification report
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
print(classification_report(y_test, predictions,
                            target_names=target_names))

              precision    recall  f1-score   support

    Democrat       0.00      0.00      0.00       274
  Republican       0.80      1.00      0.89      1123

    accuracy                           0.80      1397
   macro avg       0.40      0.50      0.45      1397
weighted avg       0.65      0.80      0.72      1397



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Use the Model to predict an outcome

In [27]:
# Get new data
test = cleaned_census_data[cleaned_census_data['years since'] == 3]
test.head()

Unnamed: 0,year,state,state_po,county_name,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,Winner,years since
6441,2020,ALABAMA,AL,AUTAUGA,55639.0,38.6,57982.0,29804.0,15.08654,Republican,3
6442,2020,ALABAMA,AL,BALDWIN,218289.0,43.2,61756.0,33751.0,9.042599,Republican,3
6443,2020,ALABAMA,AL,BARBOUR,25026.0,40.1,34990.0,20074.0,25.221769,Republican,3
6444,2020,ALABAMA,AL,BIBB,22374.0,39.9,51721.0,22626.0,16.782873,Republican,3
6445,2020,ALABAMA,AL,BLOUNT,57755.0,41.0,48922.0,25457.0,13.586702,Republican,3


In [28]:
test = test.drop(columns=['year','state','state_po'],axis=1)
svm_predict_cat = test.dtypes[test.dtypes == "object"].index.tolist()
test[svm_predict_cat].nunique()

county_name    1738
Winner            2
dtype: int64

In [29]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(test[svm_predict_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(svm_predict_cat)
encode_df.head()



Unnamed: 0,county_name_ABBEVILLE,county_name_ACCOMACK,county_name_ADA,county_name_ADAIR,county_name_ADAMS,county_name_ADDISON,county_name_AIKEN,county_name_AITKIN,county_name_ALACHUA,county_name_ALAMANCE,...,county_name_YOLO,county_name_YORK,county_name_YOUNG,county_name_YUBA,county_name_YUMA,county_name_ZAPATA,county_name_ZAVALA,county_name_ZIEBACH,Winner_Democrat,Winner_Republican
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [30]:
# Merge one-hot encoded features and drop the originals
svm_predict = test.merge(encode_df,left_index=True, right_index=True)
svm_predict = shaped_data.drop(svm_predict_cat,1)
svm_predict.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,years since,county_name_ABBEVILLE,county_name_ACCOMACK,county_name_ADA,county_name_ADAIR,...,county_name_YOLO,county_name_YORK,county_name_YOUNG,county_name_YUBA,county_name_YUMA,county_name_ZAPATA,county_name_ZAVALA,county_name_ZIEBACH,Winner_Democrat,Winner_Republican
0,12668.0,43.0,39028.0,20968.0,13.245974,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,40330.0,39.6,34148.0,17763.0,20.300025,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,52964.0,29.7,47960.0,21375.0,15.246205,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,35507.0,39.1,39101.0,19788.0,18.24429,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,9140.0,43.1,44885.0,21883.0,14.070022,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [31]:
test_data = svm_predict.iloc[0:10,:]
test_data

Unnamed: 0,Population,Median Age,Household Income,Per Capita Income,Poverty Rate,years since,county_name_ABBEVILLE,county_name_ACCOMACK,county_name_ADA,county_name_ADAIR,...,county_name_YOLO,county_name_YORK,county_name_YOUNG,county_name_YUBA,county_name_YUMA,county_name_ZAPATA,county_name_ZAVALA,county_name_ZIEBACH,Winner_Democrat,Winner_Republican
0,12668.0,43.0,39028.0,20968.0,13.245974,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,40330.0,39.6,34148.0,17763.0,20.300025,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,52964.0,29.7,47960.0,21375.0,15.246205,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,35507.0,39.1,39101.0,19788.0,18.24429,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,9140.0,43.1,44885.0,21883.0,14.070022,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,44825.0,34.0,41388.0,20319.0,17.467931,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
6,89567.0,38.5,67419.0,35133.0,7.254904,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
7,65246.0,38.7,36716.0,18893.0,16.066885,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
9,17205.0,41.9,57406.0,25821.0,7.26533,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
10,42792.0,40.6,34343.0,19775.0,19.971023,11,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [32]:
target = test_data['Winner_Republican']
target_names = ['Democrat', 'Republican']

In [33]:
test_data = test_data.drop(columns=['Winner_Republican','Winner_Democrat'], axis=1)


In [34]:
target

0     1.0
1     1.0
2     1.0
3     1.0
4     1.0
5     1.0
6     1.0
7     1.0
9     1.0
10    1.0
Name: Winner_Republican, dtype: float64

In [35]:
predictions = model.predict(test_data)
print(predictions)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
