In [18]:
import numpy as np
from sklearn.model_selection import train_test_split
import utils
from utils import DataSelector, DataFromSelector, conn, DataFromSelectorW

years = utils.Groups(5)

Groups = utils.CombinedDataSelector(years).ConstructNoFilter()

ds = utils.DataSelector(5,Groups)

dfs = utils.DataFromSelector(ds,1970,35,utils.conn)

data = dfs.constructAll()

X_train, X_test, y_train, y_test = train_test_split(np.array(data[0]),
     np.array(data[1]), test_size=0.1, random_state=42)

In [2]:
from models import LinRegWithPoly, RidgeRegCVWithPoly, LassoCVWithPoly

In [3]:
lr_model = LinRegWithPoly(degree_range=[1,2,3])
lr_model.fit(X_train, y_train)
lr_model.evaluate(X_test, y_test)

(0.08011885176934885,
 {'val_neg_mean_squared_error': -0.5045287419425678,
  'val_neg_root_mean_squared_error': -0.7103018667739568,
  'val_r2': 0.08011885176934885})

In [7]:
lr_model.best_estimator_.get_params()

{'memory': None,
 'steps': [('poly', PolynomialFeatures(degree=1)),
  ('model', LinearRegression())],
 'verbose': False,
 'poly': PolynomialFeatures(degree=1),
 'model': LinearRegression(),
 'poly__degree': 1,
 'poly__include_bias': True,
 'poly__interaction_only': False,
 'poly__order': 'C',
 'model__copy_X': True,
 'model__fit_intercept': True,
 'model__n_jobs': None,
 'model__normalize': False}

In [None]:
lr_model.best_estimator_.named_steps['poly'].degree

In [None]:
ridge = RidgeRegCVWithPoly(degree_range=[1, 2, 3], alphas=np.logspace(-2, 1, 10))
ridge.fit(X_train, y_train)
ridge.evaluate(X_test, y_test)

In [None]:
ridge.best_estimator_.named_steps['poly'].degree

In [None]:
ridge.best_estimator_.named_steps['model'].alpha_

In [None]:
lasso = LassoCVWithPoly(degree=3, alphas=np.logspace(-10, 1, 10))
lasso.fit(X_train, y_train)
lasso.evaluate(X_test, y_test)

In [None]:
lasso.named_steps['model'].alpha_

In [None]:
from models import KNNRegressor

In [None]:
knn = KNNRegressor()
knn.fit(X_train, y_train)
knn.evaluate(X_test, y_test)

In [None]:
knn.cv_results_

In [None]:
from models import SVMRegression

In [None]:
svr = SVMRegression(C_range=np.logspace(0.5, 1.5, 6), epsilon_range=np.logspace(-2, 0, 6))
svr.fit(X_train, y_train)
svr.evaluate(X_test, y_test)

In [None]:
svr.cv_results_

In [None]:
svr.best_estimator_

In [29]:
from models import *

In [None]:
decTree = DecTreeReg()
decTree.fit(X_train, y_train)
decTree.evaluate(X_test, y_test)

In [None]:
randForest = RandomForestReg()
randForest.fit(X_train, y_train)
randForest.evaluate(X_test, y_test)

In [17]:
def TrainingPipeline(daArray, conn, model):
    cds = utils.CombinedDataSelector(daArray)
    for s in cds.Construct():
        (selected,itemi) = s
        ds = DataSelector(5,selected)
        dfs = DataFromSelector(ds,1980,25,conn)
        datas = dfs.constructAll()
        (X, y) = datas
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
        modelc = model.getNewConsolitedModel()
        modelc.fit(X_train,y_train)
        Score = modelc.evaluate(X_test,y_test)[0]
        print(itemi.note, Score, modelc.name())

In [21]:
class SingleModel:
    def __init__(self,model):
        self.model = model
    def getNewConsolitedModel(self):
        return self.model
    def name(self):
        return self.model.name()

In [8]:
TrainingPipeline(years,utils.conn,SingleModel(RandomForestReg()))

Size of the economy (GDP per capita) 0.3835140687153946
Poverty rates at international poverty lines 0.42107894419044367
Distribution of income or consumption 0.39485317428236366
Labor force structure 0.41664129732001876
Employment by sector 0.39901534922349235
Unemployment 0.4186386971620045
Health systems 0.38261203222016593
Energy dependency, efficiency and carbon dioxide emissions 0.3930315268876007
Structure of merchandise exports 0.3886573401560983
Structure of merchandise imports 0.4014362301457548
Structure of service exports 0.38137754863693796
Structure of service imports 0.40372571747725616
Business environment: Doing Business indicators 0.4067912367098915
Financial access, stability and efficiency 0.41042014716557607
Tax policies 0.39270977533619766


In [35]:
class ConsolitedModel:
    def __init__(self, models):
        self.models = models
    def fit(self,X,y):
        scores = 1000000
        for modeli in self.models:
            model = modeli()
            model.fit(X,y)
            scoresw = model.evaluate(X,y)[0]
            if scoresw < scores:
                scores = scoresw
                self.modelselected = model
    def evaluate(self,X,y):
        return self.modelselected.evaluate(X,y)

    def name(self):
        return self.modelselected.__class__.__name__

In [23]:
TrainingPipeline(years,utils.conn,SingleModel(ConsolitedModel([RandomForestReg(),DecTreeReg()])))

Size of the economy (GDP per capita) 0.38923355791273884
Poverty rates at international poverty lines 0.4026254822210066
Distribution of income or consumption 0.4024533426044339
Labor force structure 0.396352099909824
Employment by sector 0.4106308189845409
Unemployment 0.40988807661824855
Health systems 0.3864514577144953
Energy dependency, efficiency and carbon dioxide emissions 0.3940916250828389
Structure of merchandise exports 0.38288840682866354
Structure of merchandise imports 0.40918288826421123
Structure of service exports 0.3948536382499819
Structure of service imports 0.4184723882570526
Business environment: Doing Business indicators 0.4005093467853339
Financial access, stability and efficiency 0.4039201871795749
Tax policies 0.3987474614836719


In [None]:
["AUS",
"AUT",
"BEL",
"CAN",
"CHE",
"CYP",
"CZE",
"DEU",
"DNK",
"ESP",
"EST",
"FIN",
"FRA",
"GRC",
"HKG",
"IRL",
"ISL",
"ISR",
"ITA",
"JPN",
"KOR",
"LTU",
"LUX",
"LVA",
"MAC",
"MLT",
"NLD",
"NOR",
"NZL",
"PRI",
"PRT",
"SGP",
"SMR",
"SVK",
"SVN",
"SWE",
"USA"]

In [13]:
from utils import *
ds = DataSelector(5,["NY.GDP.PCAP.PP.KD","SI.POV.LMIC.GP","SI.DST.10TH.10","SI.DST.50MD"])

dfs = DataFromSelectorW(ds,2015,1,conn)

In [14]:
dfs.constructAll()[1]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [15]:
returnCounter()

11003

In [16]:
import utils
counters = returnCounter()
for y in range(1990,2018):

    years = utils.Groups(5)

    Groups = utils.CombinedDataSelector(years).ConstructNoFilter()

    ds = utils.DataSelector(5,Groups)

    dfs = utils.DataFromSelector(ds,y,1,utils.conn)

    data = dfs.constructAll()

    print(y,returnCounter() - counters)

    counters = returnCounter()

1990 0
1991 0
1992 0
1993 0
1994 0
1995 7776
1996 6559
1997 6446
1998 6538
1999 6538
2000 6536
2001 6435
2002 6431
2003 6343
2004 6346
2005 5524
2006 5615
2007 5476
2008 4870
2009 4500
2010 4271
2011 4209
2012 4158
2013 4118
2014 3977
2015 3792
2016 3963
2017 3879


In [37]:
TrainingPipeline(years,utils.conn,SingleModel(ConsolitedModel([RandomForestReg,DecTreeReg,LinRegWithPoly, RidgeRegCVWithPoly, LassoCVWithPoly,KNNGaussianKernelReg, KNNRegressor])))


ValueError: Invalid parameter alpha for estimator Pipeline(steps=[('poly', PolynomialFeatures(degree=3)),
                ('model', LinearRegression())]). Check the list of available parameters with `estimator.get_params().keys()`.