In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import accuracy_score,make_scorer,f1_score,classification_report,average_precision_score
from sklearn.preprocessing import Normalizer,MinMaxScaler,StandardScaler,normalize
from sklearn.cross_validation import train_test_split

In [2]:
pd.set_option('max_columns',1000)
pd.set_option('max_rows',1000)

In [3]:
#############################################
# PANDAS HELPERS
#############################################

def remove_column_from_data_frame(col_to_remove, data_frame):

    if col_to_remove in list(data_frame.columns):
        data_frame.drop(col_to_remove, axis=1, inplace=True)

        
def remove_columns_from_data_frame(cols_to_remove, data_frame):

    column_dict = {x: None for x in list(data_frame.columns)}

    cols_to_remove = [x for x in cols_to_remove if x in column_dict]

    data_frame.drop(labels=cols_to_remove, axis=1, inplace=True)
    

def remove_columns_like(column_pattern, data_frame):
    
    for column in list(data_frame.columns):
        if column_pattern in column:
            data_frame.drop(column, axis=1, inplace=True)


def fill_nas(value, data_frame):
    
    data_frame.fillna(0, inplace=True)

In [4]:
#############################################
# DATA RETRIEVAL HELPERS
#############################################

def get_data(n_rows=None):

    if n_rows is not None:
        df = pd.read_csv('final_feats_without_dummies_3.csv', low_memory=False, nrows=n_rows)
        df_y = pd.read_csv('final_outs_3.csv', low_memory=False, nrows=n_rows)
    else:
        df = pd.read_csv('final_feats_without_dummies_3.csv', low_memory=False)
        df_y = pd.read_csv('final_outs_3.csv', low_memory=False)
    
    
    # Drop labels and a redundant column
    remove_columns_from_data_frame(['Unnamed: 0', 'Unnamed: 0.1' 'dissent', 'dissentdummy'], df)
    df,df_y=remove_bad_rows(df,df_y)
    df=drop_unneeded_cols(df)
    #df_x=dummify(df_x)
    
    # Extras -- for analysis
    # CASE 1: REMOVE TOP 2
    # CASE 2: REMOVE ALL 'DISS'
    
#     remove_columns_from_data_frame(['type', 'turnonthresh'], df)
#     remove_columns_from_data_frame(['type1', 'last3'], df)
#     remove_columns_like('diss', df)
    if ('Unnamed: 0' or 'Unnamed: 0.1') in df_y.columns:
        df_y.drop(labels=['Unnamed: 0','Unnamed: 0.1'],axis=1,inplace=True)
    
    return df, df_y


def get_x_y(n_rows=None):
    
    df, df_y = get_data(n_rows)

    #fill_nas(0, df)
    
    return df.values, df_y.ix[:,0].values


def get_columns(df):
    
    #df = pd.get_dummies(pd.read_csv('final_feats_without_dummies.csv', low_memory=False, nrows=2))
    return list(df.columns)


def print_report(y, y_pred):

    print classification_report(y, y_pred)
    


In [5]:
#############################################
# MODEL HELPERS
#############################################

def grid_search(X, y, clf, param_grid):
    
#     param_dict={'average': 'weighted'}
    scorer = make_scorer(average_precision_score)


    gridclf = GridSearchCV(clf, paramgrid, scoring=scorer, cv=3, verbose=1)

    gridclf.fit(X, y)

    print gridclf.best_params_
    print gridclf.best_estimator_

    print_report(y_test, gridclf.predict(X_test))
    

def get_top_n(n, arr, col_names, prev_list=[]):
    
    if n <= 0:
        return []
    
    most_imp = -1
    most_imp_index = -1

    for i in range(len(arr)):

        if i in prev_list:
            continue

        if arr[i] > most_imp:
            most_imp = arr[i]
            most_imp_index = i

    prev_list.append(most_imp_index)

    return [ (col_names[most_imp_index], most_imp) ] + get_top_n(n - 1, arr, col_names, prev_list)

In [9]:
def drop_unneeded_cols(df):
    del_cols = ['fileid','cite','vol','beginpg','endopin','endpage','docnum','priorpub','_merge','year',
            'circuit','pseatno','decision_date','aatty_first_name','aatty_last_name','afirm_name',
            'ratty_first_name','ratty_last_name','rname_of_first_listed_amicus_gro','rfirm_namew','decisiondatenew2',
           'j1name','j2name','j3name','quartertoelect','pname','seatno','success','lsuc','ls1','ls2','ls3','lp',
            'lp2','lp3','sseatno','congress','congreso','afirst_listed_amicus_group','yearquarter','name','Name','State','j',
            'codej4','j4vote1','j4vote2','j4maj1','j4maj2','codej5','j5vote1','j5vote2','j5maj1','j5maj2',
            'codej6','j6vote1','j6vote2','j6maj1','j6maj2','codej7','j7vote1','j7vote2','j7maj1','j7maj2',
            'codej8','j8vote1','j8vote2','j8maj1','j8maj2','codej9','j9vote1','j9vote2','j9maj1','j9maj2',
            'codej10','j10vote1','j10vote2','j10maj1','j10maj2','codej11','j11vote1','j11vote2','j11maj1','j11maj2',
            'codej12','j12vote1','j12vote2','j12maj1','j12maj2','codej13','j13vote1','j13vote2','j13maj1','j13maj2',
            'codej14','j14vote1','j14vote2','j14maj1','j14maj2','codej15','j15vote1','j15vote2','j15maj1','j15maj2','j16maj1','j16vote1']
    df.drop(labels=del_cols,axis=1,inplace=True)
    moredropcolumns=df.columns.tolist() # .tolist?
    for i in moredropcolumns:
        if len(pd.unique(df[i]))==1:
            df.drop(labels=i,axis=1,inplace=True)
    df.drop(labels=['casenum','j2vote1','j2vote2','j2maj1','direct1',
                          'j2maj2','j3vote1','j3vote2','j3maj1','j3maj2','majvotes','ids'],axis=1,inplace=True)
    return df
    
def dummify(df):
    new_cols=df.columns
    new_cols=new_cols.tolist()
#     keep_cols=['j1score','j2score','j3score','popularpct','electoralpct','closerd','fartherd','dAds3','dF2Ads3',
#            'dF1Ads3','dL1Ads3','dL2Ads3','dL3Ads3','dL4Ads3','dL5Ads3','logAds3','logL1Ads3','logL2Ads3','logF1Ads3',
#           'logF2Ads3','decade2','propneg','likely_elev2','score','d12','d13','d23','sat_together_count']

    float_cols=['j1score','j2score','j3score','popularpct','electoralpct','closerd','fartherd','dAds3','dF2Ads3',
           'dF1Ads3','dL1Ads3','dL2Ads3','dL3Ads3','dL4Ads3','dL5Ads3','logAds3','logL1Ads3','logL2Ads3','logF1Ads3',
          'logF2Ads3','decade2','propneg','likely_elev2','score','d12','d13','d23',
           'judgecitations','experience','experiencetrun','age2trun','agego','assets','ba','liable',
            'networth','totalcities','sat_together_count','keytotal','lengthopin','Wopinionlenght','Wtotalcites','age']

    remove_for_now=['Ads3','F1Ads3','F2Ads3','L1Ads3','L2Ads3','L3Ads3','L4Ads3','L5Ads3','Unnamed: 0.1','appel1','appel2',
               'citevol','codej3','id','usc2sect','usc1sect','age2','distjudg','respond1','respond2','yearb','pred','csb']

    df_x.drop(labels=remove_for_now,inplace=True,axis=1)
    sum1=0
    
    dummy_cols=[]
    for col in df.columns:
        if col not in float_cols:
            if len(pd.unique(df.ix[:,col]))>100 or (df.ix[:,col].dtype!='float64' and df.ix[:,col].dtype!='int64'): 
                sum1+= len(pd.unique(df.ix[:,col]))
                dummy_cols.append(col)
    print "# of dummy columns: ",sum1
    df2=pd.get_dummies(df,columns=dummy_cols,dummy_na=True,sparse=True)
    df2=df2.fillna(value=0,inplace=True)
    return df2


def remove_bad_rows(df_x,df_y):
    
    #remove rows where codej1==codej2
#     df[df.codej1==df.codej2].index
    same_cols = df_x[df_x.codej1==df_x.codej2].index
    df_x=df_x.drop(same_cols).reset_index(drop=True)
    df_y=df_y.drop(same_cols).reset_index(drop=True)
    #remove rows where >3 judges occur
#     pp = pd.read_csv('../raw/Votelevel_stuffjan2013.csv')
#     qq=pp.groupby(by=['casenum']).count()
#     pd.unique(qq.month)
#     rr=qq[qq.month==6].reset_index()
#     rr.shape
    
    #remove rows where codej2==null
    #df[map(lambda x: not(x),pd.notnull(df.ix[:]["codej2"]).tolist())]
    nan_cols=df_x[map(lambda x: not(x),pd.notnull(df_x.ix[:]["codej2"]).tolist())].index
    nan_cols.append(df_x[map(lambda x: not(x),pd.notnull(df_x.ix[:]["codej1"]).tolist())].index)
    df_x=df_x.drop(nan_cols).reset_index(drop=True)
    df_y=df_y.drop(nan_cols).reset_index(drop=True)
    
    return df_x,df_y

In [129]:
for col in df_x.columns:
    if col not in float_cols:
        if len(pd.unique(df_x.ix[:,col]))>100:
            print col,len(pd.unique(df_x.ix[:,col]))

casetyp1 213
casetyp2 184
city 162
codej1 1551
codej2 1551
endyear 107
ls 113
pos2 104
pos3 109
totalcites 113


In [134]:
float_cols=['j1score','j2score','j3score','popularpct','electoralpct','closerd','fartherd','dAds3','dF2Ads3',
       'dF1Ads3','dL1Ads3','dL2Ads3','dL3Ads3','dL4Ads3','dL5Ads3','logAds3','logL1Ads3','logL2Ads3','logF1Ads3',
      'logF2Ads3','decade2','propneg','likely_elev2','score','d12','d13','d23',
       'judgecitations','experience','experiencetrun','age2trun','agego','assets','ba','liable',
        'networth','totalcities','sat_together_count','keytotal','lengthopin','Wopinionlenght','Wtotalcites','age']

# remove_for_now=['Ads3','F1Ads3','F2Ads3','L1Ads3','L2Ads3','L3Ads3','L4Ads3','L5Ads3','Unnamed: 0.1','appel1','appel2',
#            'citevol','codej3','id','usc2sect','usc1sect','age2','distjudg','respond1','respond2','yearb','pred','csb']

# df_x.drop(labels=remove_for_now,inplace=True,axis=1)
sum1=0
dummy_cols=[]
for col in df_x.columns:
    if col not in float_cols:
        if len(pd.unique(df_x.ix[:,col]))>100 or (df_x.ix[:,col].dtype!='float64' and df_x.ix[:,col].dtype!='int64'): 
            sum1+= len(pd.unique(df_x.ix[:,col])) #, len(pd.unique(df_x.ix[:,col])), df_x[:,col].dtype
            dummy_cols.append(col)

In [135]:
sum1

4342

In [147]:
large_cols=[]
small_cols=[]
sum1=0
for col in df_x.columns:
    if col not in float_cols and col not in remove_for_now:
        if len(pd.unique(df_x.ix[:,col]))>10:
            sum1+=len(pd.unique(df_x.ix[:,col]))
            #print col,len(pd.unique(df_x.ix[:,col]))
            large_cols.append(col)
        else:
            small_cols.append(col)

In [44]:
df_y.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,0
0,0,0,1
1,1,1,1
2,2,2,1
3,3,3,1
4,4,4,1


In [148]:
sum1

10382

In [7]:
df_x,df_y = get_data()

In [10]:
df_x=dummify(df_x)

# of dummy columns:  4342


In [11]:
df_x.shape

(111538, 5032)

In [12]:
df_x.head()

Unnamed: 0,ElecYear_AndPrior,Wlengthopin,Wopinionlenght,Wtotalcites,_Icircuit_1,_Icircuit_10,_Icircuit_11,_Icircuit_2,_Icircuit_3,_Icircuit_4,_Icircuit_5,_Icircuit_6,_Icircuit_7,_Icircuit_8,_Icircuit_9,_Igeniss2_1,_Igeniss2_2,_Igeniss2_3,_Igeniss2_6,_Igeniss2_7,_Ilastquart_1,_Iquarter_2,_Iquarter_3,_Iquarter_4,_Iquarterto_1,_Iquarterto_10,_Iquarterto_11,_Iquarterto_12,_Iquarterto_13,_Iquarterto_14,_Iquarterto_15,_Iquarterto_2,_Iquarterto_3,_Iquarterto_4,_Iquarterto_5,_Iquarterto_6,_Iquarterto_7,_Iquarterto_8,_Iquarterto_9,_Iyear_1926,_Iyear_1927,_Iyear_1928,_Iyear_1929,_Iyear_1930,_Iyear_1931,_Iyear_1932,_Iyear_1933,_Iyear_1934,_Iyear_1935,_Iyear_1936,_Iyear_1937,_Iyear_1938,_Iyear_1939,_Iyear_1940,_Iyear_1941,_Iyear_1942,_Iyear_1943,_Iyear_1944,_Iyear_1945,_Iyear_1946,_Iyear_1947,_Iyear_1948,_Iyear_1949,_Iyear_1950,_Iyear_1951,_Iyear_1952,_Iyear_1953,_Iyear_1954,_Iyear_1955,_Iyear_1956,_Iyear_1957,_Iyear_1958,_Iyear_1959,_Iyear_1960,_Iyear_1961,_Iyear_1962,_Iyear_1963,_Iyear_1964,_Iyear_1965,_Iyear_1966,_Iyear_1967,_Iyear_1968,_Iyear_1969,_Iyear_1970,_Iyear_1971,_Iyear_1972,_Iyear_1973,_Iyear_1974,_Iyear_1975,_Iyear_1976,_Iyear_1977,_Iyear_1978,_Iyear_1979,_Iyear_1980,_Iyear_1981,_Iyear_1982,_Iyear_1983,_Iyear_1984,_Iyear_1985,_Iyear_1986,_Iyear_1987,_Iyear_1988,_Iyear_1989,_Iyear_1990,_Iyear_1991,_Iyear_1992,_Iyear_1993,_Iyear_1994,_Iyear_1995,_Iyear_1996,_Iyear_1997,_Iyear_1998,_Iyear_1999,_Iyear_2000,_Iyear_2001,_Iyear_2002,___j,___l,a_groups,aba,abamin,abusedis,add1,adminrev,affirmdummy,afghan,age,age2trun,age50,agego,agen_acq,ageon,alj,alst_code,altdisp,amicus,amon,ap_stid,appage,appbus,appfed,appfiduc,applfrom,appnatpr,appnonp,appres,appstate,appsubst,apptdemocrat,asame,asame_f1,asame_f2,asensame,asensame_f1,asensame_f2,assets,attyfee,ayear,ba,bank_ap1,bank_ap2,bank_r1,bank_r2,bapp,bast,boom,capric,caseload,civproc1,civproc2,classact,clinton,close,close2,close3,closerd,comment,conc0pro0merdummy,conc0promerdummy,conclint,concmer1,concmer2,concmerdummy,concpro0merdummy,concpro1,concpro2,concprodummy,concpromerdummy,concur,concurauthor,concurdummy,concurjoin,concurvalence,condole,confess,const1,const2,constit,counsel,counsel1,counsel2,count,courtesy,crmproc1,crmproc2,crossa,crossapp,crossl,d,d12,d13,d23,dAds3,dF1Ads3,dF2Ads3,dL1Ads3,dL2Ads3,dL3Ads3,dL4Ads3,dL5Ads3,dak,dal,dar,day,daz,dca,dco,dcother,dct,dde,deathpen,decade,decade2,decuncon,democrat,denovo,dfl,dga,dhi,dhouse,did,dil,din,dio,direct2,discover,diss0pro0merdummy,diss0promerdummy,dissent,dissentvalence,dissentvote,dissmer,dissmerdummy,disspro,disspro0merdummy,dissprodummy,disspromerdummy,distance,district,districtjudgeinpanel,diverse,divided,divided1,dividedA,dks,dky,dla,dma,dmd,dme,dmi,dmn,dmo,dms,dmt,dnc,dnd,dne,dnh,dnj,dnm,dnv,dny,doh,dok,dor,dpa,dri,dsc,dsd,dsen,dtn,dtx,dueproc,dummy,dut,dva,dvt,dwa,dwi,dwv,dwy,e1,e2,e3,e4,e5,e6,electoralpct,electoralpivotal,elevate,elevated,elevated2,entrap,erron,execord,exhaust,experience,experiencetrun,extra5,extreme,extremee,extremeleft,extremelefte,extremeright,extremerighte,extremescore,fartherd,fedlaw,fedlaw2,fedvst,female,foreign,fourties,freeinfo,frivapp,frivol,fromdistrictcourt,genapel1,genapel2,gender,geniss,geniss2,geniss3,genresp1,genresp2,genstand,graddeg1,graddeg2,graddeg3,gulf,habeas,hdem,hother,hrep,immunity,improper,indict,indigent,inexperience,initiate,initiate1,initiate2,initiate3,initiate4,initiate5,injunct,insane,int_law,interven,isspty,j1appres,j1distjudge,j1party,j1sample,j1score,j2appres,j2distjudge,j2party,j2sample,j2score,j3appres,j3distjudge,j3party,j3sample,j3score,jdpp,judgdisc,judgecitations,judrev,juris,juryinst,keytotal,korean,last3,lastquarter,lastthreequarter,late,left,lengthopin,liable,liberaldissent,liberalvote,likely_elev2,logAds3,logF1Ads3,logF2Ads3,logL1Ads3,logL2Ads3,lq,lsst,m,majority,mergeAuburn,method,mona,monl,month,month911_3mon,month911_f1,month911_f2,month911_f3,month911_f4,month911_t0,month911_t1,month911_t2,month911_t3,month911_t4,month911_t5,month911_t6,mootness,moved,mrespty,multdoc,negativecites,negclint,negdole,networth,noattyap,noattyrp,nofirmap,nofirmrp,nomoversinpanel,notice,numappel,numresp,oldj1party,oldj2party,oldj3party,opinstat,opp_wins,origin,othadmis,othappth,othcivil,othcrim,othjury,oththres,paag,pada,pag,pagelgth,pago,party,pausa,pbank,pbj,pcab,pcabdept,pcc,pccom,pccoun,pcomct,pcustom,pda,pdat,pfedjdge,pfjdget,pgov,pgovt,phouse,pindreg1,pindreg2,pindreg3,pindreg4,pindreg5,plawprof,plea,pleft,pleftsum,plocct,plother,plotherl,pmag,pmagis,pmayor,pname2,pname3,polquest,popularpct,...,endyear_2005,endyear_2006,endyear_2006.0,endyear_2007,endyear_2007.0,endyear_2008,endyear_2009,endyear_2010,endyear_2011,endyear_present,endyear_nan,ls_0.0,ls_1.0,ls_4.0,ls_5.0,ls_8.0,ls_10.0,ls_11.0,ls_13.0,ls_14.0,ls_15.0,ls_17.0,ls_18.0,ls_19.0,ls_20.0,ls_21.0,ls_22.0,ls_23.0,ls_24.0,ls_25.0,ls_26.0,ls_27.0,ls_28.0,ls_29.0,ls_30.0,ls_31.0,ls_32.0,ls_33.0,ls_34.0,ls_35.0,ls_36.0,ls_39.0,ls_41.0,ls_42.0,ls_43.0,ls_44.0,ls_45.0,ls_46.0,ls_47.0,ls_48.0,ls_49.0,ls_51.0,ls_52.0,ls_54.0,ls_55.0,ls_56.0,ls_57.0,ls_59.0,ls_60.0,ls_61.0,ls_62.0,ls_63.0,ls_64.0,ls_66.0,ls_67.0,ls_68.0,ls_70.0,ls_71.0,ls_77.0,ls_81.0,ls_82.0,ls_84.0,ls_85.0,ls_86.0,ls_87.0,ls_89.0,ls_90.0,ls_91.0,ls_92.0,ls_94.0,ls_95.0,ls_96.0,ls_99.0,ls_108.0,ls_116.0,ls_125.0,ls_133.0,ls_135.0,ls_137.0,ls_141.0,ls_146.0,ls_148.0,ls_150.0,ls_152.0,ls_154.0,ls_159.0,ls_160.0,ls_162.0,ls_164.0,ls_165.0,ls_166.0,ls_168.0,ls_173.0,ls_183.0,ls_186.0,ls_188.0,ls_191.0,ls_192.0,ls_193.0,ls_194.0,ls_198.0,ls_200.0,ls_201.0,ls_206.0,ls_211.0,ls_216.0,ls_220.0,ls_221.0,ls_227.0,ls_235.0,ls_241.0,ls_250.0,ls_578.0,ls_nan,pos2_1.0,pos2_2.0,pos2_3.0,pos2_4.0,pos2_5.0,pos2_6.0,pos2_7.0,pos2_8.0,pos2_9.0,pos2_10.0,pos2_13.0,pos2_14.0,pos2_15.0,pos2_16.0,pos2_18.0,pos2_20.0,pos2_21.0,pos2_22.0,pos2_23.0,pos2_24.0,pos2_26.0,pos2_27.0,pos2_29.0,pos2_30.0,pos2_31.0,pos2_33.0,pos2_38.0,pos2_46.0,pos2_47.0,pos2_52.0,pos2_54.0,pos2_55.0,pos2_62.0,pos2_65.0,pos2_71.0,pos2_78.0,pos2_83.0,pos2_102.0,pos2_106.0,pos2_113.0,pos2_115.0,pos2_117.0,pos2_123.0,pos2_124.0,pos2_125.0,pos2_130.0,pos2_131.0,pos2_136.0,pos2_141.0,pos2_142.0,pos2_152.0,pos2_155.0,pos2_163.0,pos2_166.0,pos2_167.0,pos2_169.0,pos2_173.0,pos2_174.0,pos2_175.0,pos2_180.0,pos2_205.0,pos2_209.0,pos2_210.0,pos2_219.0,pos2_220.0,pos2_224.0,pos2_230.0,pos2_301.0,pos2_302.0,pos2_315.0,pos2_319.0,pos2_324.0,pos2_327.0,pos2_329.0,pos2_336.0,pos2_337.0,pos2_358.0,pos2_367.0,pos2_369.0,pos2_400.0,pos2_403.0,pos2_408.0,pos2_411.0,pos2_413.0,pos2_420.0,pos2_424.0,pos2_425.0,pos2_431.0,pos2_432.0,pos2_433.0,pos2_437.0,pos2_440.0,pos2_445.0,pos2_448.0,pos2_456.0,pos2_460.0,pos2_496.0,pos2_500.0,pos2_502.0,pos2_504.0,pos2_516.0,pos2_524.0,pos2_528.0,pos2_nan,pos3_1.0,pos3_2.0,pos3_3.0,pos3_4.0,pos3_5.0,pos3_6.0,pos3_7.0,pos3_8.0,pos3_9.0,pos3_14.0,pos3_20.0,pos3_22.0,pos3_23.0,pos3_24.0,pos3_25.0,pos3_26.0,pos3_27.0,pos3_29.0,pos3_30.0,pos3_31.0,pos3_33.0,pos3_38.0,pos3_47.0,pos3_48.0,pos3_49.0,pos3_60.0,pos3_71.0,pos3_91.0,pos3_92.0,pos3_95.0,pos3_98.0,pos3_99.0,pos3_105.0,pos3_110.0,pos3_116.0,pos3_123.0,pos3_124.0,pos3_127.0,pos3_128.0,pos3_130.0,pos3_131.0,pos3_133.0,pos3_136.0,pos3_159.0,pos3_163.0,pos3_165.0,pos3_166.0,pos3_167.0,pos3_170.0,pos3_171.0,pos3_173.0,pos3_174.0,pos3_175.0,pos3_181.0,pos3_182.0,pos3_202.0,pos3_204.0,pos3_206.0,pos3_207.0,pos3_209.0,pos3_212.0,pos3_220.0,pos3_221.0,pos3_223.0,pos3_228.0,pos3_229.0,pos3_301.0,pos3_305.0,pos3_306.0,pos3_307.0,pos3_308.0,pos3_309.0,pos3_310.0,pos3_311.0,pos3_315.0,pos3_320.0,pos3_325.0,pos3_332.0,pos3_335.0,pos3_349.0,pos3_353.0,pos3_366.0,pos3_400.0,pos3_401.0,pos3_407.0,pos3_408.0,pos3_409.0,pos3_414.0,pos3_419.0,pos3_424.0,pos3_430.0,pos3_431.0,pos3_438.0,pos3_440.0,pos3_441.0,pos3_443.0,pos3_449.0,pos3_451.0,pos3_452.0,pos3_490.0,pos3_497.0,pos3_501.0,pos3_506.0,pos3_520.0,pos3_524.0,pos3_526.0,pos3_532.0,pos3_533.0,pos3_nan,president_D,president_R,president_nan,president_f1_D,president_f1_R,president_f1_nan,president_f2_D,president_f2_R,president_f2_nan,seatno2_02.00.02,seatno2_10.01.01,seatno2_10.02.01,seatno2_11.01.01,seatno2_11.02.01,seatno2_11.03.01,seatno2_11.04.01,seatno2_11.05.01,seatno2_11.06.01,seatno2_11.07.01,seatno2_11.08.01,seatno2_11.09.01,seatno2_11.10.01,seatno2_11.11.01,seatno2_11.12.01,seatno2_2.0,seatno2_nan,seatno3_06.00.01,seatno3_nan,senate_D,senate_R,senate_nan,senate_f1_D,senate_f1_R,senate_f1_nan,senate_f2_D,senate_f2_R,senate_f2_nan,sseatno2_0,sseatno2_10.01.02,sseatno2_10.02.02,sseatno2_11.01.02,sseatno2_11.02.02,sseatno2_11.05.02,sseatno2_11.06.02,sseatno2_11.08.02,sseatno2_11.10.02,sseatno2_11.11.02,sseatno2_2.0,sseatno2_`,sseatno2_nan,totalcites_0.0,totalcites_1.0,totalcites_2.0,totalcites_3.0,totalcites_4.0,totalcites_5.0,totalcites_6.0,totalcites_7.0,totalcites_8.0,totalcites_9.0,totalcites_10.0,totalcites_11.0,totalcites_12.0,totalcites_13.0,totalcites_14.0,totalcites_15.0,totalcites_16.0,totalcites_17.0,totalcites_18.0,totalcites_19.0,totalcites_20.0,totalcites_21.0,totalcites_22.0,totalcites_23.0,totalcites_24.0,totalcites_25.0,totalcites_26.0,totalcites_27.0,totalcites_28.0,totalcites_29.0,totalcites_30.0,totalcites_31.0,totalcites_32.0,totalcites_33.0,totalcites_34.0,totalcites_35.0,totalcites_36.0,totalcites_37.0,totalcites_38.0,totalcites_39.0,totalcites_40.0,totalcites_41.0,totalcites_42.0,totalcites_43.0,totalcites_44.0,totalcites_45.0,totalcites_46.0,totalcites_47.0,totalcites_48.0,totalcites_49.0,totalcites_50.0,totalcites_51.0,totalcites_52.0,totalcites_53.0,totalcites_54.0,totalcites_55.0,totalcites_56.0,totalcites_57.0,totalcites_58.0,totalcites_59.0,totalcites_60.0,totalcites_61.0,totalcites_62.0,totalcites_63.0,totalcites_64.0,totalcites_65.0,totalcites_66.0,totalcites_67.0,totalcites_68.0,totalcites_69.0,totalcites_70.0,totalcites_71.0,totalcites_72.0,totalcites_73.0,totalcites_74.0,totalcites_75.0,totalcites_76.0,totalcites_77.0,totalcites_78.0,totalcites_80.0,totalcites_81.0,totalcites_83.0,totalcites_84.0,totalcites_88.0,totalcites_90.0,totalcites_91.0,totalcites_94.0,totalcites_98.0,totalcites_99.0,totalcites_100.0,totalcites_101.0,totalcites_102.0,totalcites_104.0,totalcites_105.0,totalcites_106.0,totalcites_107.0,totalcites_108.0,totalcites_112.0,totalcites_116.0,totalcites_122.0,totalcites_124.0,totalcites_132.0,totalcites_141.0,totalcites_155.0,totalcites_157.0,totalcites_178.0,totalcites_182.0,totalcites_184.0,totalcites_188.0,totalcites_199.0,totalcites_213.0,totalcites_754.0,totalcites_nan
0,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,0,90,1,0,0,0,0,0,0,0,0,38,0,0,0,0,1,0,0,0,99,0,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,59904,0,0,0,0,0,0,0,0,0,0,0,0,0,10613,0,101,114,1,0,4,4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,193,193.899994,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,98,0,0,0,0,0,0,0,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,5,5,0,3,3,3,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,0,0,3,0,0,0,0,0,0,0,0,0,0,11,0,0,1,0.0,0,0,0,0,0,0,0,0,1,1,1,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,25639,49611,0,0,0,0,0,1,0,99,99,0,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,0,90,1,0,0,0,0,0,0,0,0,38,0,0,0,0,1,0,0,0,99,0,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,59904,0,0,0,0,0,0,0,0,0,0,0,0,0,10613,0,101,114,1,0,4,4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,193,193.899994,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,98,0,0,0,0,0,0,0,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,5,5,0,3,3,3,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,0,0,3,0,0,0,0,0,0,0,0,0,0,11,0,0,1,0.0,0,0,0,0,0,0,0,0,1,1,1,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,25639,49611,0,0,0,0,0,1,0,99,99,0,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,42,44,0,70,0,42,0,0,0,0,2,38,42,0,0,0,1,0,0,1,99,0,1,1,1,1,1,1,1,0,0,1937,111,2,2,2,2,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,101,114,1,0,4,4,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,2,10,2,1,2,0,2,1,0,193,193.899994,0,1,0,2,2,0,333,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,1,2,2,1,2,0,1,0,2,2,2,2,0,1,1,2,2,2,2,2,2,0,1,2,2,2,75,2,2,0,1,2,2,0,2,1,2,2,295,140,68,32,0,0,98,0,25,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,5,5,1,3,3,3,7,7,0,0,0,0,0,0,333,13,89,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,1,0,383,0,0,0,0,0,0,0,0,0,3,11,0,0,1,-10.253763,0,0,0,0,0,0,1,0,1,3,1,2,10,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,99,99,0,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,1,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,42,44,0,70,0,42,0,0,0,0,2,38,42,0,0,0,1,0,0,1,99,0,1,1,1,1,1,1,1,0,0,1937,111,2,2,2,2,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,101,114,1,0,4,4,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,2,10,2,1,2,0,2,1,0,193,193.899994,0,1,0,2,2,0,333,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,1,2,2,1,2,0,1,0,2,2,2,2,0,1,1,2,2,2,2,2,2,0,1,2,2,2,75,2,2,0,1,2,2,0,2,1,2,2,295,140,68,32,0,0,98,0,25,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,5,5,1,3,3,3,7,7,0,0,0,0,0,0,333,13,89,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,1,0,383,0,0,0,0,0,0,0,0,0,3,11,0,0,1,-10.253763,0,0,0,0,0,0,1,0,1,3,1,2,10,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,99,99,0,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,1,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,47,48,0,51,0,47,0,0,0,0,6,38,47,0,0,0,1,0,0,1,99,0,1,1,1,1,1,1,1,0,0,1938,39,2,2,2,2,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,101,114,1,0,4,4,1,2,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,2,2,10,2,1,2,0,2,1,0,193,193.899994,0,0,0,2,2,0,333,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,1,2,2,1,2,0,1,0,2,2,2,2,0,1,1,2,2,2,2,2,2,1,1,2,2,2,75,2,2,0,1,2,2,0,2,1,2,2,267,162,66,28,6,2,98,0,12,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,5,5,1,3,3,3,7,7,0,2,0,0,0,0,333,13,89,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,1,0,14,0,0,0,0,0,0,0,0,0,2,11,0,0,1,-5.867504,0,0,0,0,0,0,1,0,0,3,1,6,3,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,99,99,0,1,1,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,13,0,1,0,0,0,0,0,0,0,0,14,1,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [19]:
df_x.fillna(value=0,inplace=True)

In [20]:
df_x.head()

Unnamed: 0,ElecYear_AndPrior,Wlengthopin,Wopinionlenght,Wtotalcites,_Icircuit_1,_Icircuit_10,_Icircuit_11,_Icircuit_2,_Icircuit_3,_Icircuit_4,_Icircuit_5,_Icircuit_6,_Icircuit_7,_Icircuit_8,_Icircuit_9,_Igeniss2_1,_Igeniss2_2,_Igeniss2_3,_Igeniss2_6,_Igeniss2_7,_Ilastquart_1,_Iquarter_2,_Iquarter_3,_Iquarter_4,_Iquarterto_1,_Iquarterto_10,_Iquarterto_11,_Iquarterto_12,_Iquarterto_13,_Iquarterto_14,_Iquarterto_15,_Iquarterto_2,_Iquarterto_3,_Iquarterto_4,_Iquarterto_5,_Iquarterto_6,_Iquarterto_7,_Iquarterto_8,_Iquarterto_9,_Iyear_1926,_Iyear_1927,_Iyear_1928,_Iyear_1929,_Iyear_1930,_Iyear_1931,_Iyear_1932,_Iyear_1933,_Iyear_1934,_Iyear_1935,_Iyear_1936,_Iyear_1937,_Iyear_1938,_Iyear_1939,_Iyear_1940,_Iyear_1941,_Iyear_1942,_Iyear_1943,_Iyear_1944,_Iyear_1945,_Iyear_1946,_Iyear_1947,_Iyear_1948,_Iyear_1949,_Iyear_1950,_Iyear_1951,_Iyear_1952,_Iyear_1953,_Iyear_1954,_Iyear_1955,_Iyear_1956,_Iyear_1957,_Iyear_1958,_Iyear_1959,_Iyear_1960,_Iyear_1961,_Iyear_1962,_Iyear_1963,_Iyear_1964,_Iyear_1965,_Iyear_1966,_Iyear_1967,_Iyear_1968,_Iyear_1969,_Iyear_1970,_Iyear_1971,_Iyear_1972,_Iyear_1973,_Iyear_1974,_Iyear_1975,_Iyear_1976,_Iyear_1977,_Iyear_1978,_Iyear_1979,_Iyear_1980,_Iyear_1981,_Iyear_1982,_Iyear_1983,_Iyear_1984,_Iyear_1985,_Iyear_1986,_Iyear_1987,_Iyear_1988,_Iyear_1989,_Iyear_1990,_Iyear_1991,_Iyear_1992,_Iyear_1993,_Iyear_1994,_Iyear_1995,_Iyear_1996,_Iyear_1997,_Iyear_1998,_Iyear_1999,_Iyear_2000,_Iyear_2001,_Iyear_2002,___j,___l,a_groups,aba,abamin,abusedis,add1,adminrev,affirmdummy,afghan,age,age2trun,age50,agego,agen_acq,ageon,alj,alst_code,altdisp,amicus,amon,ap_stid,appage,appbus,appfed,appfiduc,applfrom,appnatpr,appnonp,appres,appstate,appsubst,apptdemocrat,asame,asame_f1,asame_f2,asensame,asensame_f1,asensame_f2,assets,attyfee,ayear,ba,bank_ap1,bank_ap2,bank_r1,bank_r2,bapp,bast,boom,capric,caseload,civproc1,civproc2,classact,clinton,close,close2,close3,closerd,comment,conc0pro0merdummy,conc0promerdummy,conclint,concmer1,concmer2,concmerdummy,concpro0merdummy,concpro1,concpro2,concprodummy,concpromerdummy,concur,concurauthor,concurdummy,concurjoin,concurvalence,condole,confess,const1,const2,constit,counsel,counsel1,counsel2,count,courtesy,crmproc1,crmproc2,crossa,crossapp,crossl,d,d12,d13,d23,dAds3,dF1Ads3,dF2Ads3,dL1Ads3,dL2Ads3,dL3Ads3,dL4Ads3,dL5Ads3,dak,dal,dar,day,daz,dca,dco,dcother,dct,dde,deathpen,decade,decade2,decuncon,democrat,denovo,dfl,dga,dhi,dhouse,did,dil,din,dio,direct2,discover,diss0pro0merdummy,diss0promerdummy,dissent,dissentvalence,dissentvote,dissmer,dissmerdummy,disspro,disspro0merdummy,dissprodummy,disspromerdummy,distance,district,districtjudgeinpanel,diverse,divided,divided1,dividedA,dks,dky,dla,dma,dmd,dme,dmi,dmn,dmo,dms,dmt,dnc,dnd,dne,dnh,dnj,dnm,dnv,dny,doh,dok,dor,dpa,dri,dsc,dsd,dsen,dtn,dtx,dueproc,dummy,dut,dva,dvt,dwa,dwi,dwv,dwy,e1,e2,e3,e4,e5,e6,electoralpct,electoralpivotal,elevate,elevated,elevated2,entrap,erron,execord,exhaust,experience,experiencetrun,extra5,extreme,extremee,extremeleft,extremelefte,extremeright,extremerighte,extremescore,fartherd,fedlaw,fedlaw2,fedvst,female,foreign,fourties,freeinfo,frivapp,frivol,fromdistrictcourt,genapel1,genapel2,gender,geniss,geniss2,geniss3,genresp1,genresp2,genstand,graddeg1,graddeg2,graddeg3,gulf,habeas,hdem,hother,hrep,immunity,improper,indict,indigent,inexperience,initiate,initiate1,initiate2,initiate3,initiate4,initiate5,injunct,insane,int_law,interven,isspty,j1appres,j1distjudge,j1party,j1sample,j1score,j2appres,j2distjudge,j2party,j2sample,j2score,j3appres,j3distjudge,j3party,j3sample,j3score,jdpp,judgdisc,judgecitations,judrev,juris,juryinst,keytotal,korean,last3,lastquarter,lastthreequarter,late,left,lengthopin,liable,liberaldissent,liberalvote,likely_elev2,logAds3,logF1Ads3,logF2Ads3,logL1Ads3,logL2Ads3,lq,lsst,m,majority,mergeAuburn,method,mona,monl,month,month911_3mon,month911_f1,month911_f2,month911_f3,month911_f4,month911_t0,month911_t1,month911_t2,month911_t3,month911_t4,month911_t5,month911_t6,mootness,moved,mrespty,multdoc,negativecites,negclint,negdole,networth,noattyap,noattyrp,nofirmap,nofirmrp,nomoversinpanel,notice,numappel,numresp,oldj1party,oldj2party,oldj3party,opinstat,opp_wins,origin,othadmis,othappth,othcivil,othcrim,othjury,oththres,paag,pada,pag,pagelgth,pago,party,pausa,pbank,pbj,pcab,pcabdept,pcc,pccom,pccoun,pcomct,pcustom,pda,pdat,pfedjdge,pfjdget,pgov,pgovt,phouse,pindreg1,pindreg2,pindreg3,pindreg4,pindreg5,plawprof,plea,pleft,pleftsum,plocct,plother,plotherl,pmag,pmagis,pmayor,pname2,pname3,polquest,popularpct,...,endyear_2005,endyear_2006,endyear_2006.0,endyear_2007,endyear_2007.0,endyear_2008,endyear_2009,endyear_2010,endyear_2011,endyear_present,endyear_nan,ls_0.0,ls_1.0,ls_4.0,ls_5.0,ls_8.0,ls_10.0,ls_11.0,ls_13.0,ls_14.0,ls_15.0,ls_17.0,ls_18.0,ls_19.0,ls_20.0,ls_21.0,ls_22.0,ls_23.0,ls_24.0,ls_25.0,ls_26.0,ls_27.0,ls_28.0,ls_29.0,ls_30.0,ls_31.0,ls_32.0,ls_33.0,ls_34.0,ls_35.0,ls_36.0,ls_39.0,ls_41.0,ls_42.0,ls_43.0,ls_44.0,ls_45.0,ls_46.0,ls_47.0,ls_48.0,ls_49.0,ls_51.0,ls_52.0,ls_54.0,ls_55.0,ls_56.0,ls_57.0,ls_59.0,ls_60.0,ls_61.0,ls_62.0,ls_63.0,ls_64.0,ls_66.0,ls_67.0,ls_68.0,ls_70.0,ls_71.0,ls_77.0,ls_81.0,ls_82.0,ls_84.0,ls_85.0,ls_86.0,ls_87.0,ls_89.0,ls_90.0,ls_91.0,ls_92.0,ls_94.0,ls_95.0,ls_96.0,ls_99.0,ls_108.0,ls_116.0,ls_125.0,ls_133.0,ls_135.0,ls_137.0,ls_141.0,ls_146.0,ls_148.0,ls_150.0,ls_152.0,ls_154.0,ls_159.0,ls_160.0,ls_162.0,ls_164.0,ls_165.0,ls_166.0,ls_168.0,ls_173.0,ls_183.0,ls_186.0,ls_188.0,ls_191.0,ls_192.0,ls_193.0,ls_194.0,ls_198.0,ls_200.0,ls_201.0,ls_206.0,ls_211.0,ls_216.0,ls_220.0,ls_221.0,ls_227.0,ls_235.0,ls_241.0,ls_250.0,ls_578.0,ls_nan,pos2_1.0,pos2_2.0,pos2_3.0,pos2_4.0,pos2_5.0,pos2_6.0,pos2_7.0,pos2_8.0,pos2_9.0,pos2_10.0,pos2_13.0,pos2_14.0,pos2_15.0,pos2_16.0,pos2_18.0,pos2_20.0,pos2_21.0,pos2_22.0,pos2_23.0,pos2_24.0,pos2_26.0,pos2_27.0,pos2_29.0,pos2_30.0,pos2_31.0,pos2_33.0,pos2_38.0,pos2_46.0,pos2_47.0,pos2_52.0,pos2_54.0,pos2_55.0,pos2_62.0,pos2_65.0,pos2_71.0,pos2_78.0,pos2_83.0,pos2_102.0,pos2_106.0,pos2_113.0,pos2_115.0,pos2_117.0,pos2_123.0,pos2_124.0,pos2_125.0,pos2_130.0,pos2_131.0,pos2_136.0,pos2_141.0,pos2_142.0,pos2_152.0,pos2_155.0,pos2_163.0,pos2_166.0,pos2_167.0,pos2_169.0,pos2_173.0,pos2_174.0,pos2_175.0,pos2_180.0,pos2_205.0,pos2_209.0,pos2_210.0,pos2_219.0,pos2_220.0,pos2_224.0,pos2_230.0,pos2_301.0,pos2_302.0,pos2_315.0,pos2_319.0,pos2_324.0,pos2_327.0,pos2_329.0,pos2_336.0,pos2_337.0,pos2_358.0,pos2_367.0,pos2_369.0,pos2_400.0,pos2_403.0,pos2_408.0,pos2_411.0,pos2_413.0,pos2_420.0,pos2_424.0,pos2_425.0,pos2_431.0,pos2_432.0,pos2_433.0,pos2_437.0,pos2_440.0,pos2_445.0,pos2_448.0,pos2_456.0,pos2_460.0,pos2_496.0,pos2_500.0,pos2_502.0,pos2_504.0,pos2_516.0,pos2_524.0,pos2_528.0,pos2_nan,pos3_1.0,pos3_2.0,pos3_3.0,pos3_4.0,pos3_5.0,pos3_6.0,pos3_7.0,pos3_8.0,pos3_9.0,pos3_14.0,pos3_20.0,pos3_22.0,pos3_23.0,pos3_24.0,pos3_25.0,pos3_26.0,pos3_27.0,pos3_29.0,pos3_30.0,pos3_31.0,pos3_33.0,pos3_38.0,pos3_47.0,pos3_48.0,pos3_49.0,pos3_60.0,pos3_71.0,pos3_91.0,pos3_92.0,pos3_95.0,pos3_98.0,pos3_99.0,pos3_105.0,pos3_110.0,pos3_116.0,pos3_123.0,pos3_124.0,pos3_127.0,pos3_128.0,pos3_130.0,pos3_131.0,pos3_133.0,pos3_136.0,pos3_159.0,pos3_163.0,pos3_165.0,pos3_166.0,pos3_167.0,pos3_170.0,pos3_171.0,pos3_173.0,pos3_174.0,pos3_175.0,pos3_181.0,pos3_182.0,pos3_202.0,pos3_204.0,pos3_206.0,pos3_207.0,pos3_209.0,pos3_212.0,pos3_220.0,pos3_221.0,pos3_223.0,pos3_228.0,pos3_229.0,pos3_301.0,pos3_305.0,pos3_306.0,pos3_307.0,pos3_308.0,pos3_309.0,pos3_310.0,pos3_311.0,pos3_315.0,pos3_320.0,pos3_325.0,pos3_332.0,pos3_335.0,pos3_349.0,pos3_353.0,pos3_366.0,pos3_400.0,pos3_401.0,pos3_407.0,pos3_408.0,pos3_409.0,pos3_414.0,pos3_419.0,pos3_424.0,pos3_430.0,pos3_431.0,pos3_438.0,pos3_440.0,pos3_441.0,pos3_443.0,pos3_449.0,pos3_451.0,pos3_452.0,pos3_490.0,pos3_497.0,pos3_501.0,pos3_506.0,pos3_520.0,pos3_524.0,pos3_526.0,pos3_532.0,pos3_533.0,pos3_nan,president_D,president_R,president_nan,president_f1_D,president_f1_R,president_f1_nan,president_f2_D,president_f2_R,president_f2_nan,seatno2_02.00.02,seatno2_10.01.01,seatno2_10.02.01,seatno2_11.01.01,seatno2_11.02.01,seatno2_11.03.01,seatno2_11.04.01,seatno2_11.05.01,seatno2_11.06.01,seatno2_11.07.01,seatno2_11.08.01,seatno2_11.09.01,seatno2_11.10.01,seatno2_11.11.01,seatno2_11.12.01,seatno2_2.0,seatno2_nan,seatno3_06.00.01,seatno3_nan,senate_D,senate_R,senate_nan,senate_f1_D,senate_f1_R,senate_f1_nan,senate_f2_D,senate_f2_R,senate_f2_nan,sseatno2_0,sseatno2_10.01.02,sseatno2_10.02.02,sseatno2_11.01.02,sseatno2_11.02.02,sseatno2_11.05.02,sseatno2_11.06.02,sseatno2_11.08.02,sseatno2_11.10.02,sseatno2_11.11.02,sseatno2_2.0,sseatno2_`,sseatno2_nan,totalcites_0.0,totalcites_1.0,totalcites_2.0,totalcites_3.0,totalcites_4.0,totalcites_5.0,totalcites_6.0,totalcites_7.0,totalcites_8.0,totalcites_9.0,totalcites_10.0,totalcites_11.0,totalcites_12.0,totalcites_13.0,totalcites_14.0,totalcites_15.0,totalcites_16.0,totalcites_17.0,totalcites_18.0,totalcites_19.0,totalcites_20.0,totalcites_21.0,totalcites_22.0,totalcites_23.0,totalcites_24.0,totalcites_25.0,totalcites_26.0,totalcites_27.0,totalcites_28.0,totalcites_29.0,totalcites_30.0,totalcites_31.0,totalcites_32.0,totalcites_33.0,totalcites_34.0,totalcites_35.0,totalcites_36.0,totalcites_37.0,totalcites_38.0,totalcites_39.0,totalcites_40.0,totalcites_41.0,totalcites_42.0,totalcites_43.0,totalcites_44.0,totalcites_45.0,totalcites_46.0,totalcites_47.0,totalcites_48.0,totalcites_49.0,totalcites_50.0,totalcites_51.0,totalcites_52.0,totalcites_53.0,totalcites_54.0,totalcites_55.0,totalcites_56.0,totalcites_57.0,totalcites_58.0,totalcites_59.0,totalcites_60.0,totalcites_61.0,totalcites_62.0,totalcites_63.0,totalcites_64.0,totalcites_65.0,totalcites_66.0,totalcites_67.0,totalcites_68.0,totalcites_69.0,totalcites_70.0,totalcites_71.0,totalcites_72.0,totalcites_73.0,totalcites_74.0,totalcites_75.0,totalcites_76.0,totalcites_77.0,totalcites_78.0,totalcites_80.0,totalcites_81.0,totalcites_83.0,totalcites_84.0,totalcites_88.0,totalcites_90.0,totalcites_91.0,totalcites_94.0,totalcites_98.0,totalcites_99.0,totalcites_100.0,totalcites_101.0,totalcites_102.0,totalcites_104.0,totalcites_105.0,totalcites_106.0,totalcites_107.0,totalcites_108.0,totalcites_112.0,totalcites_116.0,totalcites_122.0,totalcites_124.0,totalcites_132.0,totalcites_141.0,totalcites_155.0,totalcites_157.0,totalcites_178.0,totalcites_182.0,totalcites_184.0,totalcites_188.0,totalcites_199.0,totalcites_213.0,totalcites_754.0,totalcites_nan
0,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,0,90,1,0,0,0,0,0,0,0,0,38,0,0,0,0,1,0,0,0,99,0,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,59904,0,0,0,0,0,0,0,0,0,0,0,0,0,10613,0,101,114,1,0,4,4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,193,193.899994,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,98,0,0,0,0,0,0,0,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,5,5,0,3,3,3,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,0,0,3,0,0,0,0,0,0,0,0,0,0,11,0,0,1,0.0,0,0,0,0,0,0,0,0,1,1,1,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,25639,49611,0,0,0,0,0,1,0,99,99,0,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,0,90,1,0,0,0,0,0,0,0,0,38,0,0,0,0,1,0,0,0,99,0,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,59904,0,0,0,0,0,0,0,0,0,0,0,0,0,10613,0,101,114,1,0,4,4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,193,193.899994,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,98,0,0,0,0,0,0,0,0,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,5,5,0,3,3,3,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,0,0,3,0,0,0,0,0,0,0,0,0,0,11,0,0,1,0.0,0,0,0,0,0,0,0,0,1,1,1,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,25639,49611,0,0,0,0,0,1,0,99,99,0,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,42,44,0,70,0,42,0,0,0,0,2,38,42,0,0,0,1,0,0,1,99,0,1,1,1,1,1,1,1,0,0,1937,111,2,2,2,2,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,101,114,1,0,4,4,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,2,10,2,1,2,0,2,1,0,193,193.899994,0,1,0,2,2,0,333,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,1,2,2,1,2,0,1,0,2,2,2,2,0,1,1,2,2,2,2,2,2,0,1,2,2,2,75,2,2,0,1,2,2,0,2,1,2,2,295,140,68,32,0,0,98,0,25,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,5,5,1,3,3,3,7,7,0,0,0,0,0,0,333,13,89,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,1,0,383,0,0,0,0,0,0,0,0,0,3,11,0,0,1,-10.253763,0,0,0,0,0,0,1,0,1,3,1,2,10,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,99,99,0,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,1,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,42,44,0,70,0,42,0,0,0,0,2,38,42,0,0,0,1,0,0,1,99,0,1,1,1,1,1,1,1,0,0,1937,111,2,2,2,2,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,101,114,1,0,4,4,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,2,10,2,1,2,0,2,1,0,193,193.899994,0,1,0,2,2,0,333,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,1,2,2,1,2,0,1,0,2,2,2,2,0,1,1,2,2,2,2,2,2,0,1,2,2,2,75,2,2,0,1,2,2,0,2,1,2,2,295,140,68,32,0,0,98,0,25,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,5,5,1,3,3,3,7,7,0,0,0,0,0,0,333,13,89,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,1,0,383,0,0,0,0,0,0,0,0,0,3,11,0,0,1,-10.253763,0,0,0,0,0,0,1,0,1,3,1,2,10,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,99,99,0,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,1,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1,11,11,5,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,1,0,47,48,0,51,0,47,0,0,0,0,6,38,47,0,0,0,1,0,0,1,99,0,1,1,1,1,1,1,1,0,0,1938,39,2,2,2,2,1,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,101,114,1,0,4,4,1,2,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,2,2,10,2,1,2,0,2,1,0,193,193.899994,0,0,0,2,2,0,333,1,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,1,0,0,1,1,1,2,2,1,2,0,1,0,2,2,2,2,0,1,1,2,2,2,2,2,2,1,1,2,2,2,75,2,2,0,1,2,2,0,2,1,2,2,267,162,66,28,6,2,98,0,12,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,5,5,1,3,3,3,7,7,0,2,0,0,0,0,333,13,89,0,0,0,0,1,2,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,1,1,-0.45,1,1,1,4,0,1,0,14,0,0,0,0,0,0,0,0,0,2,11,0,0,1,-5.867504,0,0,0,0,0,0,1,0,0,3,1,6,3,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,99,99,0,1,1,1,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,13,0,1,0,0,0,0,0,0,0,0,14,1,0,0,0,0,0,0,0,0,0,60.799999,...,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,1,,0,1,,0,1,,0,,,,,,,,,,,,,,,,,1,,1,1,,0,1,,0,1,,0,,,,,,,,,,,,,1,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [151]:
df_x.shape

(111538, 5032)

In [152]:
df_y.shape

(111538, 1)

In [21]:
X=df_x.values
y=df_y.ix[:,0].values

In [63]:
#############################################
# Read data into X and y
#############################################

X, y = get_x_y()

In [22]:
print X.shape
print y.shape

(111538, 5032)
(111538,)


In [23]:
#sanity check
print X[:10]
print y[:10]

[[  1.  11.  11. ...,   0.   0.   0.]
 [  1.  11.  11. ...,   0.   0.   0.]
 [  1.  11.  11. ...,   0.   0.   0.]
 ..., 
 [  1.   5.   5. ...,   0.   0.   0.]
 [  1.   5.   5. ...,   0.   0.   0.]
 [  1.   5.   5. ...,   0.   0.   0.]]
[1 1 1 1 1 1 1 1 1 1]


In [24]:
#############################################
# Split into training and test set
#############################################

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [31]:
X.nbytes/1024/1024/1024

4

In [26]:
df_x.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 111538 entries, 0 to 111537
Columns: 5032 entries, ElecYear_AndPrior to totalcites_nan
dtypes: float64(5031), int64(1)
memory usage: 607.6 MB


In [34]:
print  X_train.shape
print y_train.shape
print X_test.shape
print y_test.shape

(78076, 5032)
(78076,)
(33462, 5032)
(33462,)


In [67]:
# #############################################
# # Standard scale
# #############################################

# scaler = StandardScaler()
# scaler.fit(X_train)

# X_test = scaler.transform(X_test)

In [76]:
for ind,x in enumerate(df_x.dtypes):
    if x!='float64' and x!='int64':
        print ind,x,df_x.columns[ind]

125 object ___char
147 object amicusapp
148 object amicusresp
217 object congresi
338 object endyear
565 object president
566 object president_f1
567 object president_f2
642 object seatno2
643 object seatno3
645 object senate
646 object senate_f1
647 object senate_f2
665 object sseatno2


In [None]:
#############################################
# [OPTIONAL]
# Random Forest Grid Search
#############################################

paramgrid = {'n_estimators': [10, 50, 100], 'max_depth': [1, 5, 10, 15]}

rf_clf = RandomForestClassifier(random_state=42)

grid_search(X_train, y_train, rf_clf, paramgrid)

In [46]:
#############################################
# Random Forest
#############################################

# Replace labels (in case SVM was run)
# y_train[y_train == 0.] = -1.
# y_test[y_test == 0.] = -1.

rf_clf = RandomForestClassifier(random_state=42, 
                                n_estimators=100, 
                                max_depth=15, 
#                                 class_weight={1.0: 1, -1.0: 150})
                                )

rf_clf.fit(X_train, y_train)

y_pred = rf_clf.predict(X_test)

print_report(y_test, y_pred)

             precision    recall  f1-score   support

         -1       0.73      0.49      0.58      2280
          1       0.98      0.99      0.99     53553

avg / total       0.97      0.97      0.97     55833



In [40]:
#############################################
# [OPTIONAL]
# Feature importance analysis
#############################################

top_n = get_top_n(10, rf_clf.feature_importances_, get_columns())

for t in top_n:
    print t


('type1', 0.085836245627072427)
('last3', 0.057453894299753429)
('close2', 0.013785569650024256)
('close3', 0.013742677538063049)
('diss0promerdummy', 0.0065234843229324416)
('unanimous', 0.0057345487383386635)
('dissent', 0.0047420628379996922)
('din', 0.0043813667302849586)
('concprodummy', 0.0036449179471683664)
('keytotal', 0.0035560335123791609)


In [97]:
#############################################
# [OPTIONAL]
# SVM Grid Search
#############################################

paramgrid = {'kernel': ['rbf', 'poly', 'sigmoid', 'linear'], 
             'degree': [1, 3, 5, 7, 9], 
             'coef0': [1e-3, 1e-1, 1e1, 1e3], 
             'max_iter': [1000], 
             'class_weight': [{1.0: 1, -1.0: 150}]}

svm_clf = SVC()

grid_search(X_train, y_train, svm_clf, paramgrid)

[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    4.1s
[Parallel(n_jobs=1)]: Done 199 tasks       | elapsed:   15.5s


Fitting 3 folds for each of 80 candidates, totalling 240 fits
{'kernel': 'poly', 'max_iter': 1000, 'coef0': 1000.0, 'degree': 1, 'class_weight': {1.0: 1, -1.0: 150}}
SVC(C=1.0, cache_size=200, class_weight={1.0: 1, -1.0: 150}, coef0=1000.0,
  decision_function_shape=None, degree=1, gamma='auto', kernel='poly',
  max_iter=1000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
             precision    recall  f1-score   support

         -1       0.00      0.00      0.00        11
          1       0.98      1.00      0.99       489

avg / total       0.96      0.98      0.97       500



[Parallel(n_jobs=1)]: Done 240 out of 240 | elapsed:   18.5s finished


In [65]:
#############################################
# SVM
#############################################

# Replace labels
# y_train[y_train == 0.] = -1.
# y_test[y_test == 0.] = -1.

svm_clf = SVC(kernel='rbf', max_iter=1000, coef0=1e-3, degree=2, class_weight={1.0: 1, -1.0: 150})

svm_clf.fit(X_train, y_train)

y_pred = svm_clf.predict(X_test)

print_report(y_test, y_pred)

             precision    recall  f1-score   support

         -1       0.00      0.00      0.00        11
          1       0.98      1.00      0.99       489

avg / total       0.96      0.98      0.97       500



In [13]:
# tdf = pd.get_dummies(pd.read_csv('final_outs.csv', low_memory=False))

# pd.unique(tdf.ix[:,1].values)

array([ 1, -1])