
# 목표: 우리는 어떤 사람이 선거 투표에 참여하는지, 심리적 성향과 인구통계학적 정보를 가지고 예측할 수 있을까?

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
from sklearn.metrics import f1_score, confusion_matrix, precision_recall_curve, roc_curve
from sklearn.preprocessing import StandardScaler, Binarizer
from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import LabelEncoder, StandardScaler
import lightgbm as lgbm

In [2]:
#데이터 불러오기
train = pd.read_csv("train.csv",index_col=0)
test = pd.read_csv('test.csv', index_col=0)
pd.set_option('display.max_columns', 150)

<h1>데이터 전처리</h1>

---

#### 1.데이터 확인

In [3]:
#DataFrame.isnull()을 통해 train, test 데이터에는 결손값이 없는 것을 알 수 있음.
print("train 데이터 누락값 수: {}개".format(train.isnull().sum().sum()))
print("test 데이터 누락값 수: {}개".format(test.isnull().sum().sum()))

train 데이터 누락값 수: 0개
test 데이터 누락값 수: 0개


#### 2.이상값 처리

In [4]:
#train데이터의 "familysize"column은 2번째로 큰 값이 44이고 제일 큰 값이 999일 정도로 outlier가 존재함을 알 수 있음.
print(train['familysize'].value_counts(ascending=False))

2      14321
3       9037
1       5261
4       3917
5       1521
0        983
6        662
7        313
8        175
9        102
10        48
11        29
12        16
13        10
14         7
15         7
44         3
17         3
21         2
16         2
20         2
999        1
34         1
18         1
30         1
Name: familysize, dtype: int64


In [5]:
#IQR(Inter Quantile Range)을 이용해 이상값을 처리하는 것보단 제일 큰 값인 999만 처리하는 것이 데이터적 측면에서 훨씬 탁월하다고 생각하여 max값만 제거함.
max_index = train['familysize'].idxmax()
train.drop(max_index, axis=0, inplace=True)

#### 3. 데이터 변환

In [6]:
# label_encoding 사용시
#'gender', 'age_group', 'race', 'religion'의 데이터는 정수형이 아니기 때문에 인코딩을 통해 데이터를 정수형으로 변환

# def label_encoding(df):
#     le = LabelEncoder()
#     original_columns = list(df.columns)
    
#     for col in df:
#         # df의 컬럼의 유형이 object인 것들만
#         if df[col].dtype == 'object':
#             le.fit(df[col])
#             df[col] = le.transform(df[col])

#     # 새롭게 만들어진 컬럼들의 이름을 리스트로 저장
#     new_columns = [c for c in df.columns if c not in original_columns]
    
#     # 수치형으로 변경된 df와 새롭게 만들어진 컬럼 이름 리스트를 반환
#     return df, new_columns

In [7]:
#'gender', 'age_group', 'race', 'religion'의 데이터는 정수형이 아니기 때문에 인코딩을 통해 데이터를 정수형으로 변환
train = pd.get_dummies(train)
train

Unnamed: 0_level_0,QaA,QaE,QbA,QbE,QcA,QcE,QdA,QdE,QeA,QeE,QfA,QfE,QgA,QgE,QhA,QhE,QiA,QiE,QjA,QjE,QkA,QkE,QlA,QlE,QmA,QmE,QnA,QnE,QoA,QoE,QpA,QpE,QqA,QqE,QrA,QrE,QsA,QsE,QtA,QtE,education,engnat,familysize,hand,married,tp01,tp02,tp03,tp04,tp05,tp06,tp07,tp08,tp09,tp10,urban,voted,wf_01,wf_02,wf_03,wr_01,wr_02,wr_03,wr_04,wr_05,wr_06,wr_07,wr_08,wr_09,wr_10,wr_11,wr_12,wr_13,age_group_+70s,age_group_10s,age_group_20s,age_group_30s,age_group_40s,age_group_50s,age_group_60s,gender_Female,gender_Male,race_Arab,race_Asian,race_Black,race_Indigenous Australian,race_Native American,race_Other,race_White,religion_Agnostic,religion_Atheist,religion_Buddhist,religion_Christian_Catholic,religion_Christian_Mormon,religion_Christian_Other,religion_Christian_Protestant,religion_Hindu,religion_Jewish,religion_Muslim,religion_Other,religion_Sikh
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
43375,1.0,324,5.0,1067,5.0,359,1.0,709,1.0,742,1.0,330,5.0,541,5.0,1376,2.0,1397,5.0,1250,2.0,762,5.0,517,4.0,3614,1.0,521,5.0,705,4.0,633,1.0,330,1.0,512,5.0,347,5.0,588,3,1,2,1,1,2,2,1,5,2,4,5,4,1,2,2,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0
4486,1.0,534,5.0,1555,5.0,2024,1.0,1097,1.0,3077,3.0,6721,1.0,1306,4.0,2968,5.0,1998,5.0,2756,2.0,3429,5.0,2395,5.0,1745,1.0,584,5.0,1857,5.0,2894,2.0,3761,1.0,2717,3.0,2802,5.0,1299,2,2,4,1,1,1,0,2,2,0,6,1,5,5,6,3,1,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
10262,5.0,609,1.0,749,2.0,624,1.0,1833,3.0,1474,5.0,728,4.0,1107,3.0,1743,5.0,3008,3.0,1649,5.0,870,1.0,1551,1.0,989,5.0,347,1.0,824,1.0,1445,2.0,884,5.0,744,1.0,899,4.0,963,2,1,3,1,1,0,1,0,5,3,6,0,2,1,6,2,1,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
14088,4.0,182,1.0,2969,1.0,1955,4.0,4630,1.0,1321,3.0,2345,4.0,850,1.0,3559,5.0,6761,1.0,8315,5.0,614,1.0,1751,1.0,1919,4.0,96,4.0,1251,5.0,261,4.0,548,4.0,2576,2.0,877,5.0,967,3,2,3,1,1,2,2,4,5,0,1,0,2,0,4,1,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
13398,4.0,549,5.0,1679,5.0,481,3.0,595,1.0,1202,1.0,348,1.0,422,2.0,2453,1.0,1051,5.0,4733,3.0,2705,5.0,773,4.0,775,2.0,1066,5.0,824,1.0,750,4.0,535,4.0,1000,4.0,964,5.0,683,2,1,4,1,1,6,3,6,2,0,6,2,0,6,4,2,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16511,1.0,312,5.0,965,5.0,499,1.0,1188,1.0,437,1.0,319,3.0,14430,3.0,1435,1.0,2406,5.0,1222,3.0,650,5.0,8213,4.0,1231,3.0,454,5.0,764,5.0,480,1.0,526,1.0,598,5.0,461,5.0,729,2,1,6,1,1,4,3,3,5,0,1,2,2,0,3,2,1,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
33507,3.0,474,4.0,1033,5.0,602,2.0,703,1.0,782,3.0,410,1.0,388,3.0,865,4.0,1533,5.0,639,4.0,1302,2.0,725,5.0,1104,2.0,638,4.0,480,5.0,984,3.0,316,2.0,688,5.0,528,5.0,732,1,2,4,3,1,5,1,1,6,2,5,4,6,3,6,1,0,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
44917,2.0,617,4.0,1262,2.0,709,1.0,1063,2.0,1728,1.0,404,4.0,635,2.0,6379,2.0,2630,4.0,1362,5.0,1714,5.0,642,4.0,538,1.0,1432,5.0,3183,2.0,1183,4.0,435,1.0,729,5.0,751,3.0,976,4,1,6,1,2,1,2,0,5,1,2,1,2,1,5,2,1,0,1,1,1,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0
36126,5.0,294,1.0,985,1.0,504,3.0,613,5.0,551,1.0,502,4.0,479,1.0,809,4.0,1515,2.0,806,5.0,550,5.0,1544,4.0,702,5.0,385,2.0,1433,1.0,688,5.0,275,1.0,828,2.0,519,4.0,840,4,1,4,3,1,4,4,5,4,1,3,2,5,1,5,3,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0


In [8]:
test = pd.get_dummies(test)
test

Unnamed: 0_level_0,QaA,QaE,QbA,QbE,QcA,QcE,QdA,QdE,QeA,QeE,QfA,QfE,QgA,QgE,QhA,QhE,QiA,QiE,QjA,QjE,QkA,QkE,QlA,QlE,QmA,QmE,QnA,QnE,QoA,QoE,QpA,QpE,QqA,QqE,QrA,QrE,QsA,QsE,QtA,QtE,education,engnat,familysize,hand,married,tp01,tp02,tp03,tp04,tp05,tp06,tp07,tp08,tp09,tp10,urban,wf_01,wf_02,wf_03,wr_01,wr_02,wr_03,wr_04,wr_05,wr_06,wr_07,wr_08,wr_09,wr_10,wr_11,wr_12,wr_13,age_group_+70s,age_group_10s,age_group_20s,age_group_30s,age_group_40s,age_group_50s,age_group_60s,gender_Female,gender_Male,race_Arab,race_Asian,race_Black,race_Indigenous Australian,race_Native American,race_Other,race_White,religion_Agnostic,religion_Atheist,religion_Buddhist,religion_Christian_Catholic,religion_Christian_Mormon,religion_Christian_Other,religion_Christian_Protestant,religion_Hindu,religion_Jewish,religion_Muslim,religion_Other,religion_Sikh
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1
38741,1.0,1069,4.0,3768,5.0,953,2.0,1027,5.0,1370,1.0,519,2.0,15397,3.0,1976,5.0,3383,4.0,10762,4.0,8356,3.0,3440,3.0,580,3.0,1448,3.0,880,5.0,1186,2.0,1833,2.0,2819,4.0,1094,5.0,962,2,1,1,1,1,4,2,1,3,0,2,5,5,0,6,3,0,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0
43904,2.0,351,2.0,761,3.0,474,2.0,569,2.0,762,1.0,2079,3.0,1577,1.0,1383,3.0,1257,5.0,299,3.0,500,5.0,3345,4.0,474,1.0,260,4.0,509,3.0,982,1.0,273,2.0,516,3.0,508,3.0,841,4,1,2,1,2,5,4,1,5,2,3,0,5,0,6,3,0,0,0,1,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0
41036,2.0,469,1.0,5394,1.0,461,3.0,567,4.0,784,5.0,454,5.0,586,1.0,1309,5.0,378,1.0,397,5.0,473,1.0,501,2.0,477,4.0,558,2.0,2189,1.0,352,5.0,803,5.0,503,3.0,419,1.0,536,2,1,2,1,1,2,0,2,1,0,0,1,3,4,5,2,0,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0
6939,2.0,805,5.0,2828,2.0,2503,1.0,2332,1.0,1928,1.0,775,1.0,5697,5.0,1194,1.0,3867,5.0,7538,5.0,1732,5.0,1665,5.0,1746,2.0,769,5.0,1529,2.0,1875,3.0,523,2.0,1188,1.0,5350,5.0,2169,2,2,1,1,1,1,6,0,6,0,3,4,6,0,6,3,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
14682,1.0,541,4.0,700,4.0,1110,1.0,398,1.0,795,4.0,788,4.0,582,2.0,1146,5.0,4115,4.0,689,4.0,1298,5.0,486,5.0,429,4.0,360,2.0,842,4.0,852,2.0,317,3.0,882,5.0,410,3.0,1232,2,1,2,2,1,4,2,2,1,0,1,1,3,3,5,1,0,0,1,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28225,1.0,347,5.0,755,5.0,888,1.0,775,1.0,1162,5.0,492,5.0,671,5.0,1418,5.0,2577,2.0,1073,5.0,1404,5.0,1236,5.0,1007,1.0,1234,5.0,606,5.0,816,1.0,908,4.0,897,3.0,733,5.0,553,3,2,2,1,1,4,1,2,0,0,0,3,0,6,4,1,0,0,0,0,1,0,0,0,0,1,1,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0
5610,1.0,370,4.0,694,5.0,564,1.0,665,1.0,760,1.0,840,2.0,550,5.0,3964,2.0,1040,5.0,744,4.0,577,3.0,691,5.0,554,2.0,471,4.0,436,4.0,2074,4.0,846,2.0,636,5.0,419,4.0,1464,3,1,1,1,3,3,2,0,5,0,5,2,2,1,6,3,0,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0
41745,1.0,843,3.0,3181,5.0,113,1.0,1280,4.0,1020,5.0,1346,1.0,1190,1.0,2442,5.0,18667,4.0,1231,3.0,2245,3.0,211,5.0,1705,4.0,292,5.0,1500,1.0,1632,1.0,699,1.0,897,1.0,1962,5.0,1848,3,2,2,1,1,6,2,6,0,0,6,2,0,6,6,3,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
24818,3.0,1346,3.0,1532,5.0,1245,1.0,1033,1.0,846,5.0,388,4.0,768,2.0,1327,3.0,2245,3.0,2770,5.0,1879,5.0,793,4.0,548,2.0,1089,3.0,634,5.0,449,2.0,360,3.0,544,4.0,938,3.0,1668,2,2,2,1,1,5,2,5,5,0,4,2,0,2,2,3,0,0,0,1,1,0,1,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


#### 4.데이터 분할

In [9]:
# train에서 label값인 'vote'를 추출하여 feature값인 train_x, label값인 train_y로 분할
train_x = train.drop('voted', axis=1)
train_y = train['voted']

In [10]:
train_x

Unnamed: 0_level_0,QaA,QaE,QbA,QbE,QcA,QcE,QdA,QdE,QeA,QeE,QfA,QfE,QgA,QgE,QhA,QhE,QiA,QiE,QjA,QjE,QkA,QkE,QlA,QlE,QmA,QmE,QnA,QnE,QoA,QoE,QpA,QpE,QqA,QqE,QrA,QrE,QsA,QsE,QtA,QtE,education,engnat,familysize,hand,married,tp01,tp02,tp03,tp04,tp05,tp06,tp07,tp08,tp09,tp10,urban,wf_01,wf_02,wf_03,wr_01,wr_02,wr_03,wr_04,wr_05,wr_06,wr_07,wr_08,wr_09,wr_10,wr_11,wr_12,wr_13,age_group_+70s,age_group_10s,age_group_20s,age_group_30s,age_group_40s,age_group_50s,age_group_60s,gender_Female,gender_Male,race_Arab,race_Asian,race_Black,race_Indigenous Australian,race_Native American,race_Other,race_White,religion_Agnostic,religion_Atheist,religion_Buddhist,religion_Christian_Catholic,religion_Christian_Mormon,religion_Christian_Other,religion_Christian_Protestant,religion_Hindu,religion_Jewish,religion_Muslim,religion_Other,religion_Sikh
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1
43375,1.0,324,5.0,1067,5.0,359,1.0,709,1.0,742,1.0,330,5.0,541,5.0,1376,2.0,1397,5.0,1250,2.0,762,5.0,517,4.0,3614,1.0,521,5.0,705,4.0,633,1.0,330,1.0,512,5.0,347,5.0,588,3,1,2,1,1,2,2,1,5,2,4,5,4,1,2,2,0,1,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0
4486,1.0,534,5.0,1555,5.0,2024,1.0,1097,1.0,3077,3.0,6721,1.0,1306,4.0,2968,5.0,1998,5.0,2756,2.0,3429,5.0,2395,5.0,1745,1.0,584,5.0,1857,5.0,2894,2.0,3761,1.0,2717,3.0,2802,5.0,1299,2,2,4,1,1,1,0,2,2,0,6,1,5,5,6,3,0,0,0,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
10262,5.0,609,1.0,749,2.0,624,1.0,1833,3.0,1474,5.0,728,4.0,1107,3.0,1743,5.0,3008,3.0,1649,5.0,870,1.0,1551,1.0,989,5.0,347,1.0,824,1.0,1445,2.0,884,5.0,744,1.0,899,4.0,963,2,1,3,1,1,0,1,0,5,3,6,0,2,1,6,2,0,0,0,0,1,0,1,1,0,1,1,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
14088,4.0,182,1.0,2969,1.0,1955,4.0,4630,1.0,1321,3.0,2345,4.0,850,1.0,3559,5.0,6761,1.0,8315,5.0,614,1.0,1751,1.0,1919,4.0,96,4.0,1251,5.0,261,4.0,548,4.0,2576,2.0,877,5.0,967,3,2,3,1,1,2,2,4,5,0,1,0,2,0,4,1,0,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
13398,4.0,549,5.0,1679,5.0,481,3.0,595,1.0,1202,1.0,348,1.0,422,2.0,2453,1.0,1051,5.0,4733,3.0,2705,5.0,773,4.0,775,2.0,1066,5.0,824,1.0,750,4.0,535,4.0,1000,4.0,964,5.0,683,2,1,4,1,1,6,3,6,2,0,6,2,0,6,4,2,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16511,1.0,312,5.0,965,5.0,499,1.0,1188,1.0,437,1.0,319,3.0,14430,3.0,1435,1.0,2406,5.0,1222,3.0,650,5.0,8213,4.0,1231,3.0,454,5.0,764,5.0,480,1.0,526,1.0,598,5.0,461,5.0,729,2,1,6,1,1,4,3,3,5,0,1,2,2,0,3,2,0,1,1,1,1,0,1,1,0,1,1,0,1,1,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
33507,3.0,474,4.0,1033,5.0,602,2.0,703,1.0,782,3.0,410,1.0,388,3.0,865,4.0,1533,5.0,639,4.0,1302,2.0,725,5.0,1104,2.0,638,4.0,480,5.0,984,3.0,316,2.0,688,5.0,528,5.0,732,1,2,4,3,1,5,1,1,6,2,5,4,6,3,6,1,0,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
44917,2.0,617,4.0,1262,2.0,709,1.0,1063,2.0,1728,1.0,404,4.0,635,2.0,6379,2.0,2630,4.0,1362,5.0,1714,5.0,642,4.0,538,1.0,1432,5.0,3183,2.0,1183,4.0,435,1.0,729,5.0,751,3.0,976,4,1,6,1,2,1,2,0,5,1,2,1,2,1,5,2,0,1,1,1,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0
36126,5.0,294,1.0,985,1.0,504,3.0,613,5.0,551,1.0,502,4.0,479,1.0,809,4.0,1515,2.0,806,5.0,550,5.0,1544,4.0,702,5.0,385,2.0,1433,1.0,688,5.0,275,1.0,828,2.0,519,4.0,840,4,1,4,3,1,4,4,5,4,1,3,2,5,1,5,3,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0


In [11]:
train_y

index
43375    1
4486     1
10262    1
14088    0
13398    1
        ..
16511    1
33507    0
44917    1
36126    1
42340    1
Name: voted, Length: 36424, dtype: int64