In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score
from utils import *

In [2]:
data, label = gather_data()

Index(['Log GDP per capita', 'Social support',
       'Healthy life expectancy at birth', 'Freedom to make life choices',
       'Generosity', 'Perceptions of corruption', 'Positive affect',
       'Negative affect', 'Confidence in national government'],
      dtype='object')


In [3]:
x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2)
kfold = KFold(n_splits=10, shuffle=True)

In [4]:
x_train

array([[0.75150714, 0.87731832, 0.88067426, ..., 0.64660633, 0.30672413,
        0.32789594],
       [0.59635415, 0.87781447, 0.88540588, ..., 0.72172338, 0.19528414,
        0.44285917],
       [0.77983007, 0.86603862, 0.93242644, ..., 0.6314798 , 0.37016997,
        0.23062515],
       ...,
       [0.7263043 , 0.8069393 , 0.84696143, ..., 0.5369854 , 0.24219675,
        0.09994441],
       [0.48791538, 0.87494594, 0.77155113, ..., 0.79759634, 0.19389972,
        0.29383239],
       [0.41894537, 0.72826999, 0.67751001, ..., 0.63727075, 0.19760665,
        0.64116842]])

In [5]:
basis_expansion(x_train,1)

array([[0.75150714, 0.87731832, 0.88067426, ..., 0.30672413, 0.32789594,
        1.        ],
       [0.59635415, 0.87781447, 0.88540588, ..., 0.19528414, 0.44285917,
        1.        ],
       [0.77983007, 0.86603862, 0.93242644, ..., 0.37016997, 0.23062515,
        1.        ],
       ...,
       [0.7263043 , 0.8069393 , 0.84696143, ..., 0.24219675, 0.09994441,
        1.        ],
       [0.48791538, 0.87494594, 0.77155113, ..., 0.19389972, 0.29383239,
        1.        ],
       [0.41894537, 0.72826999, 0.67751001, ..., 0.19760665, 0.64116842,
        1.        ]])

In [6]:
basis_expansion(x_train,2)

array([[0.75150714, 0.87731832, 0.88067426, ..., 0.09407969, 0.10751575,
        1.        ],
       [0.59635415, 0.87781447, 0.88540588, ..., 0.0381359 , 0.19612425,
        1.        ],
       [0.77983007, 0.86603862, 0.93242644, ..., 0.1370258 , 0.05318796,
        1.        ],
       ...,
       [0.7263043 , 0.8069393 , 0.84696143, ..., 0.05865927, 0.00998889,
        1.        ],
       [0.48791538, 0.87494594, 0.77155113, ..., 0.0375971 , 0.08633747,
        1.        ],
       [0.41894537, 0.72826999, 0.67751001, ..., 0.03904839, 0.41109694,
        1.        ]])

In [10]:
stats = []


for i in range(1,9):
    x_expansion = basis_expansion(x_train,i)
    stat = {
            "n": i,
            "MSE": [],
            "R2": [],
            "MSPE": []
    }
    for train_index, val_index in kfold.split(x_expansion, y_train):
        
        model = RANSACRegressor(min_samples=10, max_trials=200,
                             loss='absolute_error', random_state=12,
                             residual_threshold=10)
        model.fit(x_expansion[train_index], y_train[train_index])
        stat["MSE"].append(mean_squared_error(y_train[val_index], model.predict(x_expansion[val_index])))
        stat["R2"].append(r2_score(y_train[val_index], model.predict(x_expansion[val_index])))
        stat["MSPE"].append(mean_squared_percentage_error(y_train[val_index], model.predict(x_expansion[val_index])))
    stat["MSE"] = np.mean(stat["MSE"])
    stat["R2"] = np.mean(stat["R2"])
    stat["MSPE"] = np.mean(stat["MSPE"])
    stats.append(stat)
stats

[{'n': 1,
  'MSE': 0.27427205717936187,
  'R2': 0.7800287854798442,
  'MSPE': 1.2873433920816841},
 {'n': 2,
  'MSE': 0.24829537931000994,
  'R2': 0.804840384031938,
  'MSPE': 1.1544262139581418},
 {'n': 3,
  'MSE': 0.24498755904552505,
  'R2': 0.8055660081429897,
  'MSPE': 1.1137325507294542},
 {'n': 4,
  'MSE': 0.2490602268886802,
  'R2': 0.8021698381986834,
  'MSPE': 1.1560434175043863},
 {'n': 5,
  'MSE': 0.2434038004850101,
  'R2': 0.8066775776822832,
  'MSPE': 1.1166305305533646},
 {'n': 6,
  'MSE': 0.4878793811843388,
  'R2': 0.6058048479579938,
  'MSPE': 2.8458200114040246},
 {'n': 7,
  'MSE': 1.3796742141552287,
  'R2': -0.15434838141698073,
  'MSPE': 9.092581555074048},
 {'n': 8,
  'MSE': 1055.8611804423685,
  'R2': -765.2131153698901,
  'MSPE': 7487.567349885617}]