In [5]:
import warnings
warnings.filterwarnings('ignore')

### t2.micro, AWS Marketplace -> Anaconda with Python 3

### 1) L1 Regularization

In [2]:
import numpy as np
from sklearn import metrics
from sklearn import datasets
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

for Alpha in [10, 1, 0.1, 0.01, 0.001, 0.0001]:

  predictions = np.round(Lasso(alpha=Alpha).fit(X_train, y_train).predict(X_test))
  print('Accuracy: {:.{}f}'.format(metrics.accuracy_score(y_test, predictions), 5) + ', Alpha: ' + str(Alpha))

Accuracy: 0.35556, Alpha: 10
Accuracy: 0.35556, Alpha: 1
Accuracy: 0.95556, Alpha: 0.1
Accuracy: 0.97778, Alpha: 0.01
Accuracy: 0.97778, Alpha: 0.001
Accuracy: 0.97778, Alpha: 0.0001


### 2) L2 regularization

In [1]:
import numpy as np
from sklearn import metrics
from sklearn import datasets
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

for Alpha in [10, 1, 0.1, 0.01, 0.001, 0.0001]:

  predictions = np.round(Ridge(alpha=Alpha).fit(X_train, y_train).predict(X_test))
  print('Accuracy: {:.{}f}'.format(metrics.accuracy_score(y_test, predictions), 5) + ', Alpha: ' + str(Alpha))

Accuracy: 0.95556, Alpha: 10
Accuracy: 0.93333, Alpha: 1
Accuracy: 0.93333, Alpha: 0.1
Accuracy: 0.93333, Alpha: 0.01
Accuracy: 0.93333, Alpha: 0.001
Accuracy: 0.93333, Alpha: 0.0001


### 3) Elastic Net

In [23]:
import numpy as np
from sklearn import metrics
from sklearn import datasets
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

for L1Ratio in [0.0, 0.5, 1.0]:

  for Alpha in [10, 1, 0.1, 0.01, 0.001, 0.0001]:

    predictions = np.round(ElasticNet(alpha=Alpha, l1_ratio=L1Ratio).fit(X_train, y_train).predict(X_test))

    print('Accuracy: {:.{}f}'.format(metrics.accuracy_score(y_test, predictions), 5)  + ', L1 Ratio: ' + str(L1Ratio) + ', Alpha: ' + str(Alpha))    

Accuracy: 0.22222, L1 Ratio: 0.0, Alpha: 10
Accuracy: 0.86667, L1 Ratio: 0.0, Alpha: 1
Accuracy: 0.93333, L1 Ratio: 0.0, Alpha: 0.1
Accuracy: 0.93333, L1 Ratio: 0.0, Alpha: 0.01
Accuracy: 0.95556, L1 Ratio: 0.0, Alpha: 0.001
Accuracy: 0.95556, L1 Ratio: 0.0, Alpha: 0.0001
Accuracy: 0.22222, L1 Ratio: 0.5, Alpha: 10
Accuracy: 0.51111, L1 Ratio: 0.5, Alpha: 1
Accuracy: 0.91111, L1 Ratio: 0.5, Alpha: 0.1
Accuracy: 0.93333, L1 Ratio: 0.5, Alpha: 0.01
Accuracy: 0.95556, L1 Ratio: 0.5, Alpha: 0.001
Accuracy: 0.95556, L1 Ratio: 0.5, Alpha: 0.0001
Accuracy: 0.22222, L1 Ratio: 1.0, Alpha: 10
Accuracy: 0.22222, L1 Ratio: 1.0, Alpha: 1
Accuracy: 0.91111, L1 Ratio: 1.0, Alpha: 0.1
Accuracy: 0.95556, L1 Ratio: 1.0, Alpha: 0.01
Accuracy: 0.95556, L1 Ratio: 1.0, Alpha: 0.001
Accuracy: 0.95556, L1 Ratio: 1.0, Alpha: 0.0001


### 4) Ensemble Methods

In [5]:
from sklearn import metrics
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

predictions = RandomForestClassifier(n_estimators=100).fit(X_train,y_train).predict(X_test)

print('Accuracy: {:.{}f}'.format(metrics.accuracy_score(y_test, predictions), 5))

Accuracy: 0.88889


### 4) Dropout

### 5) Backtesting

### 6) K-fold Cross Validation

### 7) Data Augmentation

### 9) Add more Data

### 10 Remove Features

### 12) Pruning / Reduce Complexity

### 10) Early Stopping

In [7]:
import numpy as np
import xgboost as xgb
from sklearn import metrics
from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

dtrain      = xgb.DMatrix(X_train, y_train)
bst         = xgb.train({}, dtrain, num_boost_round=100, evals=[(dtrain, 'train')], early_stopping_rounds=10)
predictions = np.round(bst.predict(xgb.DMatrix(X_test)))

print('Accuracy: {:.{}f}'.format(metrics.accuracy_score(y_test, predictions), 5))

[19:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 0 pruned nodes, max_depth=4
[0]	train-rmse:0.67273
Will train until train-rmse hasn't improved in 10 rounds.
[19:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=5
[1]	train-rmse:0.484766
[19:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 0 pruned nodes, max_depth=2
[2]	train-rmse:0.351972
[19:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=5
[3]	train-rmse:0.255113
[19:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=5
[4]	train-rmse:0.187909
[19:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 0 pruned nodes, max_depth=3
[5]	train-rmse:0.140947
[19:22:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=4
[6]	train-rmse:0.1