## Household Income Prediction

Given data about households in Korea, let's try to predict the income of a given household.

We will use various regression models to make our predictions. 

Data Source: https://www.kaggle.com/datasets/hongsean/korea-income-and-welfare

### Getting Started

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression, Ridge, Lasso, HuberRegressor
from sklearn.svm import LinearSVR
from sklearn.tree import DecisionTreeRegressor

In [2]:
data = pd.read_csv("Korea Income and Welfare.csv")
data

Unnamed: 0,id,year,wave,region,income,family_member,gender,year_born,education_level,marriage,religion,occupation,company_size,reason_none_worker
0,10101,2005,1,1,614.0,1,2,1936,2,2,2,,,8
1,10101,2011,7,1,896.0,1,2,1936,2,2,2,,,10
2,10101,2012,8,1,1310.0,1,2,1936,2,2,2,,,10
3,10101,2013,9,1,2208.0,1,2,1936,2,2,2,,,1
4,10101,2014,10,1,864.0,1,2,1936,2,2,2,,,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92852,98000701,2014,10,5,11600.0,6,1,1967,5,1,1,874,1,
92853,98000701,2015,11,5,8327.0,6,1,1967,5,1,1,874,1,
92854,98000701,2016,12,5,7931.0,6,1,1967,5,1,1,874,1,
92855,98000701,2017,13,5,8802.0,5,1,1967,5,1,1,874,1,


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92857 entries, 0 to 92856
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   id                  92857 non-null  int64  
 1   year                92857 non-null  int64  
 2   wave                92857 non-null  int64  
 3   region              92857 non-null  int64  
 4   income              92857 non-null  float64
 5   family_member       92857 non-null  int64  
 6   gender              92857 non-null  int64  
 7   year_born           92857 non-null  int64  
 8   education_level     92857 non-null  int64  
 9   marriage            92857 non-null  int64  
 10  religion            92857 non-null  int64  
 11  occupation          92857 non-null  object 
 12  company_size        92857 non-null  object 
 13  reason_none_worker  92857 non-null  object 
dtypes: float64(1), int64(10), object(3)
memory usage: 9.9+ MB


### Preprocessing

In [4]:
def onehot_encode(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column], dtype=int, prefix=prefix)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [5]:
def preprocess_inputs(df):
    df = df.copy()

    # Drop id column
    df = df.drop('id', axis=1)

    # Encode missing values properly
    df = df.replace(' ', np.NaN)

    # One-hot encode categorical variables
    nominal_features = [
        ('region', "reg"),
        ('marriage', 'mar'),
        ('religion', 'rel'),
        ('occupation', 'occ'),
        ('reason_none_worker', 'rsn')
    ]
    
    for column, prefix in nominal_features:
        df = onehot_encode(df, column=column, prefix=prefix)

    # Fill company_size missing values with 0
    df['company_size'] = df['company_size'].fillna(0)

    # Split df into X and y
    y = df['income']
    X = df.drop('income', axis=1)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)

    # Scale X
    scaler = StandardScaler()
    scaler.fit(X_train)

    X_train = pd.DataFrame(scaler.transform(X_train), columns=X.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X.columns)
    
    return X_train, X_test, y_train, y_test

In [6]:
X_train, X_test, y_train, y_test = preprocess_inputs(data)

In [7]:
X_train

Unnamed: 0,year,wave,family_member,gender,year_born,education_level,company_size,reg_1,reg_2,reg_3,reg_4,reg_5,reg_6,reg_7,mar_0,mar_1,mar_2,mar_3,mar_4,mar_5,mar_6,mar_9,rel_1,rel_2,rel_9,occ_1001,occ_1002,occ_1009,occ_1011,occ_1012,occ_111,occ_112,occ_113,occ_12,occ_120,occ_121,occ_122,occ_131,occ_132,occ_133,occ_134,occ_135,occ_139,occ_141,occ_142,occ_143,occ_144,occ_145,occ_149,occ_151,occ_152,occ_153,occ_154,occ_156,occ_157,occ_159,occ_161,occ_162,occ_163,occ_164,occ_165,occ_171,occ_172,occ_173,occ_182,occ_183,occ_184,occ_21,occ_211,occ_212,occ_213,occ_22,occ_220,occ_221,occ_222,occ_223,occ_224,occ_225,occ_23,occ_231,occ_232,occ_233,occ_234,occ_235,occ_236,occ_237,occ_238,occ_239,occ_24,occ_241,occ_242,occ_243,occ_244,occ_245,occ_246,occ_247,occ_248,occ_251,occ_252,occ_253,occ_254,occ_259,occ_261,occ_262,occ_263,occ_271,occ_272,occ_273,occ_274,occ_281,occ_282,occ_283,occ_284,occ_285,occ_286,occ_288,occ_289,occ_291,occ_292,occ_293,occ_30,occ_311,occ_312,occ_313,occ_314,occ_315,occ_316,occ_317,occ_318,occ_320,occ_321,occ_322,occ_323,occ_330,occ_391,occ_392,occ_399,occ_411,occ_412,occ_413,occ_414,occ_415,occ_416,occ_421,occ_422,occ_423,occ_429,occ_431,occ_432,occ_441,occ_442,occ_443,occ_444,occ_510,occ_511,occ_512,occ_513,occ_521,occ_522,occ_530,occ_531,occ_532,occ_611,occ_612,occ_613,occ_614,occ_615,occ_616,occ_617,occ_620,occ_630,occ_710,occ_711,occ_712,occ_713,occ_714,occ_721,occ_722,occ_730,occ_731,occ_732,occ_733,occ_741,occ_742,occ_743,occ_744,occ_751,occ_752,occ_753,occ_754,occ_761,occ_762,occ_771,occ_772,occ_773,occ_774,occ_780,occ_781,occ_782,occ_783,occ_784,occ_791,occ_792,occ_799,occ_811,occ_812,occ_813,occ_814,occ_815,occ_816,occ_817,occ_819,occ_821,occ_822,occ_823,occ_824,occ_825,occ_826,occ_827,occ_828,occ_829,occ_831,occ_832,occ_833,occ_839,occ_841,occ_842,occ_843,occ_844,occ_851,occ_852,occ_853,occ_854,occ_855,occ_861,occ_862,occ_863,occ_864,occ_871,occ_872,occ_873,occ_874,occ_875,occ_876,occ_881,occ_882,occ_891,occ_892,occ_899,occ_910,occ_911,occ_912,occ_913,occ_914,occ_915,occ_920,occ_921,occ_922,occ_930,occ_941,occ_942,occ_951,occ_952,occ_953,occ_991,occ_992,occ_999,occ_9999,rsn_0,rsn_1,rsn_10,rsn_11,rsn_2,rsn_3,rsn_4,rsn_5,rsn_6,rsn_7,rsn_8,rsn_9,rsn_99
0,0.872427,0.872427,-0.373172,-0.651209,-0.056228,-0.303502,-0.374223,-0.428721,1.940264,-0.457082,-0.38761,-0.304339,-0.284213,-0.455630,-0.01301,0.793563,-0.547277,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,0.986719,-0.985505,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,-0.134087,-0.155106,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,-0.348463,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,11.897129,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,-0.035757,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,-0.176282,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,-0.509219,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,-0.218353,-0.173479,-0.0269
1,0.124355,0.124355,1.171539,-0.651209,0.506712,0.295385,-0.374223,-0.428721,1.940264,-0.457082,-0.38761,-0.304339,-0.284213,-0.455630,-0.01301,0.793563,-0.547277,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,0.986719,-0.985505,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,-0.134087,-0.155106,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,-0.348463,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,-0.084054,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,27.966417,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,-0.176282,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,-0.509219,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,-0.218353,-0.173479,-0.0269
2,1.121785,1.121785,0.399183,-0.651209,1.444945,1.493158,0.574542,-0.428721,-0.515394,-0.457082,-0.38761,-0.304339,-0.284213,2.194763,-0.01301,0.793563,-0.547277,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,0.986719,-0.985505,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,-0.134087,6.447206,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,-0.348463,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,-0.084054,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,-0.035757,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,-0.176282,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,-0.509219,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,-0.218353,-0.173479,-0.0269
3,0.872427,0.872427,-0.373172,-0.651209,0.131418,0.295385,-0.374223,-0.428721,-0.515394,-0.457082,-0.38761,-0.304339,3.518492,-0.455630,-0.01301,0.793563,-0.547277,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,-1.013460,1.014708,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,-0.134087,-0.155106,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,2.869745,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,-0.084054,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,-0.035757,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,-0.176282,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,-0.509219,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,-0.218353,-0.173479,-0.0269
4,1.121785,1.121785,-1.145528,1.535604,-1.057011,-1.501275,-0.057968,-0.428721,-0.515394,-0.457082,-0.38761,3.285811,-0.284213,-0.455630,-0.01301,-1.260140,1.827228,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,0.986719,-0.985505,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,-0.134087,-0.155106,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,-0.348463,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,-0.084054,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,-0.035757,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,5.672714,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,-0.509219,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,-0.218353,-0.173479,-0.0269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64994,-0.623718,-0.623718,-0.373172,-0.651209,-1.182109,-1.501275,-0.690478,-0.428721,1.940264,-0.457082,-0.38761,-0.304339,-0.284213,-0.455630,-0.01301,0.793563,-0.547277,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,0.986719,-0.985505,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,-0.134087,-0.155106,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,-0.348463,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,-0.084054,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,-0.035757,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,-0.176282,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,-0.509219,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,4.579735,-0.173479,-0.0269
64995,-0.623718,-0.623718,1.171539,-0.651209,1.007103,2.092044,2.472072,-0.428721,-0.515394,2.187792,-0.38761,-0.304339,-0.284213,-0.455630,-0.01301,0.793563,-0.547277,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,-1.013460,1.014708,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,-0.134087,6.447206,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,-0.348463,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,-0.084054,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,-0.035757,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,-0.176282,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,-0.509219,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,-0.218353,-0.173479,-0.0269
64996,-1.122433,-1.122433,-0.373172,-0.651209,-1.307207,0.295385,-0.374223,-0.428721,-0.515394,-0.457082,-0.38761,-0.304339,3.518492,-0.455630,-0.01301,0.793563,-0.547277,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,-1.013460,1.014708,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,7.457837,-0.155106,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,-0.348463,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,-0.084054,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,-0.035757,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,-0.176282,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,-0.509219,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,-0.218353,-0.173479,-0.0269
64997,1.620500,1.620500,-1.145528,1.535604,-1.432305,1.493158,-0.690478,2.332519,-0.515394,-0.457082,-0.38761,-0.304339,-0.284213,-0.455630,-0.01301,-1.260140,1.827228,-0.296025,-0.112755,-0.257064,-0.023866,-0.008771,0.986719,-0.985505,-0.024815,-0.005547,-0.012405,-0.003922,-0.021844,-0.051057,-0.023211,-0.0171,-0.003922,-0.003922,-0.046956,-0.007845,0.0,-0.029885,-0.039645,-0.025428,-0.013589,-0.02076,-0.031394,-0.063861,-0.009608,-0.010378,-0.006794,-0.003922,-0.047772,-0.05357,-0.04579,-0.042646,-0.01301,-0.011095,-0.023541,-0.041546,-0.005547,-0.007845,-0.005547,-0.01301,-0.006794,-0.007845,-0.009608,-0.024186,-0.015193,-0.010378,-0.010378,-0.017544,-0.029365,-0.012405,-0.015691,-0.007845,-0.030899,-0.032599,-0.051958,-0.026612,-0.019616,-0.003922,-0.021127,-0.067176,-0.038258,-0.023866,-0.033301,-0.062882,-0.040225,-0.025123,-0.013589,-0.043365,-0.015193,-0.034662,-0.024502,-0.032361,-0.016174,-0.039839,-0.040225,-0.034662,-0.058144,-0.035757,-0.072192,-0.027467,-0.054572,-0.047284,-0.026027,-0.020004,-0.024502,-0.034662,-0.033988,-0.022877,-0.056248,-0.029365,-0.018814,-0.019219,-0.029626,-0.032122,-0.030649,-0.005547,-0.01301,-0.022877,-0.008771,-0.003922,-0.038459,-0.134087,-0.155106,-0.090842,-0.067061,-0.019616,-0.038258,-0.020004,-0.012405,-0.057608,-0.028296,-0.012405,-0.022877,-0.030142,-0.009608,-0.018401,-0.05443,-0.066712,-0.044246,-0.015691,-0.003922,-0.031394,-0.040225,-0.076074,-0.080452,-0.015691,-0.030899,-0.049363,-0.045281,-0.088134,-0.090064,-0.003922,-0.014678,-0.123018,-0.044766,-0.081797,-0.053426,-0.132112,-0.026027,-0.072835,-0.009608,-0.01301,-0.348463,-0.049206,-0.058277,-0.038258,-0.026027,-0.031882,-0.057068,-0.014678,-0.043365,-0.034884,-0.007845,-0.034662,-0.051659,-0.021844,-0.058806,-0.039057,-0.042465,-0.034439,-0.020004,-0.055695,-0.058674,-0.027467,-0.056386,-0.023866,-0.061887,-0.040985,-0.073579,-0.021844,-0.076278,-0.051659,-0.035972,-0.07379,-0.084054,-0.027467,-0.037236,-0.008771,-0.016643,-0.017977,-0.005547,-0.02076,-0.041731,-0.059461,-0.032599,-0.022538,-0.009608,-0.008771,-0.017544,-0.017544,-0.016174,-0.016643,-0.036186,-0.026612,-0.045958,-0.003922,-0.021844,-0.018814,-0.015193,-0.003922,-0.033068,-0.032361,-0.0421,-0.029885,-0.0171,-0.057608,-0.092717,-0.054145,-0.012405,-0.043895,-0.035541,-0.026612,-0.05666,-0.024815,-0.015691,-0.041916,-0.035757,-0.029365,-0.014144,-0.007845,-0.17116,-0.058542,-0.061636,-0.015691,-0.02076,-0.020004,-0.026321,-0.036398,-0.029101,-0.109419,-0.055834,-0.062882,-0.048094,-0.043186,-0.052843,-0.050601,-0.051809,-0.080549,-0.123981,-0.176282,-0.103583,-0.064944,-0.077493,-0.044594,-0.065183,-0.027467,-0.090238,-0.011768,-0.003922,-0.167361,1.963790,-0.049206,-0.009608,-0.024815,-0.086975,-0.0592,-0.108697,-0.112545,-0.218353,-0.173479,-0.0269


In [8]:
X_train.var()

year             1.000015
wave             1.000015
family_member    1.000015
gender           1.000015
year_born        1.000015
                   ...   
rsn_6            1.000015
rsn_7            1.000015
rsn_8            1.000015
rsn_9            1.000015
rsn_99           1.000015
Length: 281, dtype: float64

In [9]:
y_train

38023    2958.0
36465    7447.0
29761    8113.0
52414    3434.0
55985     564.0
          ...  
21440    2420.0
73349    6132.0
50057    2830.0
5192      540.0
77708    2469.0
Name: income, Length: 64999, dtype: float64

### Training 

In [10]:
models = {
    "                   Linear Regression": LinearRegression(),
    "    L2-Regularized Linear Regression": Ridge(),
    "    L1-Regularized Linear Regression": Lasso(),
    "           Huber (Robust) Regression": HuberRegressor(),
    "Linear Kernel Support Vector Machine": LinearSVR(),
    "                       Decision Tree": DecisionTreeRegressor()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

                   Linear Regression trained.
    L2-Regularized Linear Regression trained.
    L1-Regularized Linear Regression trained.
           Huber (Robust) Regression trained.
Linear Kernel Support Vector Machine trained.
                       Decision Tree trained.


### Results

In [11]:
print("Model R^2 Scores (Test Set):")
for name, model in models.items():
    print(name + ": {:.4f}".format(model.score(X_test, y_test)))

Model R^2 Scores (Test Set):
                   Linear Regression: -9491315600640880640.0000
    L2-Regularized Linear Regression: 0.2986
    L1-Regularized Linear Regression: 0.2985
           Huber (Robust) Regression: 0.2841
Linear Kernel Support Vector Machine: 0.2694
                       Decision Tree: 0.1291


#### Optimizing regularization stength of L2 and L1 Regression Models

In [21]:
l2_reg_strength = 1.0

l2_model = Ridge(alpha=l2_reg_strength)
l2_model.fit(X_train, y_train)

print("Ridge Regression Test R^2 (alpha={}): {:.5f}".format(l2_reg_strength, l2_model.score(X_test, y_test)))

Ridge Regression Test R^2 (alpha=1.0): 0.29861


In [26]:
l1_reg_strength = 0.0001

l1_model = Lasso(alpha=l1_reg_strength)
l1_model.fit(X_train, y_train)

print("Lasso Regression Test R^2 (alpha={}): {:.5f}".format(l1_reg_strength, l1_model.score(X_test, y_test)))

Lasso Regression Test R^2 (alpha=0.0001): 0.29861


  model = cd_fast.enet_coordinate_descent(


In [None]:
1.0     0.29848
0.1     0.29859
0.01    0.29860
0.001   0.29860
0.0001  0.29861