In [56]:
import warnings
from numpy import mean
from numpy import std
from itertools import combinations
from matplotlib import pyplot

# data engineering
from sklearn.datasets import make_classification
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline

# classification models
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier

# regression models
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor

To do:
* Implement some kind of pruning or other oprimalization for tree based models
* Some models have not obvious hiperparameters e.g. HuberRegressor and epsilon, if evaulating these models as they are doesn't take too long, consider adding those hiperparameters
* Implement all? models from my list (not deep learning), ((feature selection and similar algorithms should not be evaulated, but still can be used and their results shown)) (((or maybe they should in pipelines different))):

In [32]:
# Regression algorithms:
# Iteratywne poprawianie error rate w dostrzeganiu podobieńśtw pomiędzy variables.
# Ordinary Least Squares Regression (OLSR)
# Linear Regression
# Logistic Regression
# Stepwise Regression
# Multivariate Adaptive Regression Splines (MARS)
# Locally Estimated Scatterplot Smoothing (LOESS)

# Regularization algorithms:
# Mają dodadkową penalization preferującą prostsze modele
# Ridge Regression
# Least Absolute Shrinkage and Selection Operator (LASSO)
# Elastic Net
# Least-Angle Regression (LARS)

# Instance-base algorithms:
# Mamy dane i porównujemy do nich nową sample
# k-Nearest Neighbor (kNN)
# Learning Vector Quantization (LVQ)
# Self-Organizing Map (SOM)
# Locally Weighted Learning (LWL)
# Support Vector Machines (SVM)

# Decision Tree Algorithms:
# Classification and Regression Tree (CART)
# Iterative Dichotomiser 3 (ID3)
# C4.5 and C5.0 (different versions of a powerful approach)
# Chi-squared Automatic Interaction Detection (CHAID)
# Decision Stump
# M5
# Conditional Decision Trees

# Ensembling Algorithms:
# Algorytmy które łączą wiele sląbszych algorytmów aby otrzymać dobry wynik:
# Boosting
# Bootstrapped Aggregation (Bagging)
# AdaBoost
# Weighted Average (Blending)
# Stacked Generalization (Stacking)
# Gradient Boosting Machines (GBM)
# Gradient Boosted Regression Trees (GBRT)
# Random Forest

# Bayesian Algorithms:
# Używają bayesian therom
# Naive Bayes
# Gaussian Naive Bayes
# Multinomial Naive Bayes
# Averaged One-Dependence Estimators (AODE)
# Bayesian Belief Network (BBN)
# Bayesian Network (BN)

# Clustering Algorithms:
# k-Means
# k-Medians
# Expectation Maximisation (EM)
# Hierarchical Clustering

# Association Rule Learning Algorithms:
# Wyciągają informacje które najlepiej opisują powiązania podobieństwa obserwacjami.
# Apriori algorithm
# Eclat algorithm

# Artificial Neural Network Algorithms:
# Perceptron
# Multilayer Perceptrons (MLP)
# Back-Propagation
# Stochastic Gradient Descent
# Hopfield Network
# Radial Basis Function Network (RBFN)

# Deep Learning Algorithms
# Convolutional Neural Network (CNN)
# Recurrent Neural Networks (RNNs)
# Long Short-Term Memory Networks (LSTMs)
# Stacked Auto-Encoders
# Deep Boltzmann Machine (DBM)
# Deep Belief Networks (DBN)

# Dimensionality reduction:
# Principal Component Analysis (PCA)
# Principal Component Regression (PCR)
# Partial Least Squares Regression (PLSR)
# Sammon Mapping
# Multidimensional Scaling (MDS)
# Projection Pursuit
# Linear Discriminant Analysis (LDA)
# Mixture Discriminant Analysis (MDA)
# Quadratic Discriminant Analysis (QDA)
# Flexible Discriminant Analysis (FDA)

# Pozostałe Algorytmy:
# Algorytmy do inncyh zdań jak:
# Feature selection algorithms
# Algorithm accuracy evaluation
# Performance measures
# Optimization algorithms
# Algorytmy do subfields:
# Computational intelligence (evolutionary algorithms, etc.)
# Computer Vision (CV)
# Natural Language Processing (NLP)
# Recommender Systems
# Reinforcement Learning
# Graphical Models

In [57]:
# dummy datasets
def load_dataset():
    return make_classification(n_samples=1000, n_classes=2, random_state=1)

def load_dataset():
    return make_regression(n_samples=1000, n_features=50, noise=0.1, random_state=1)

In [58]:
# create a dictionary of models to test: {'ModelName': ModelObjectReference}
def get_classification_models():
    models=dict()
    
    # Linear base learning
    models['LogisticRegression'] = LogisticRegression()
    
    # if model has hiperparameters, we will check many popular values for them
    # to give models equal chances
    alpha = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    for a in alpha:
        models[f'RidgeClassifier_({a})'] = RidgeClassifier(alpha=a)
    
    models['SGDClassifier'] = SGDClassifier(max_iter=1000, tol=1e-3)
    models['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier(max_iter=1000, tol=1e-3)
    
    # Non Linear Learning
    models['DecisionTreeClassifier'] = DecisionTreeClassifier()
    models['ExtraTreeClassifier'] = ExtraTreeClassifier()
    
    for k in range(1, 21):
        models[f'KNeighborsClassifier_({k})'] = KNeighborsClassifier(n_neighbors=k)
        
    models['LinearSVM'] = SVC(kernel='linear')
    models['PolynomialSVM'] = SVC(kernel='poly')
    
    c_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    for c in c_values:
        models[f'SupportVectorClassifier_({c})'] = SVC(C=c)
        
    models['GaussianNaiveBayes'] = GaussianNB()
    
    # Ensemble Learning
    n_trees = 100
    models['AdaBoostClassifier'] = AdaBoostClassifier(n_estimators=n_trees)
    models['BaggingClassifier'] = BaggingClassifier(n_estimators=n_trees)
    models['RandomForestClassifier'] = RandomForestClassifier(n_estimators=n_trees)
    models['ExtraTreesClassifier'] = ExtraTreesClassifier(n_estimators=n_trees)
    models['GradientBoostingClassifier'] = GradientBoostingClassifier(n_estimators=n_trees)
    
    return models

In [59]:
def get_regression_models():
    models=dict()
    
    # Linear Base Learning
    models['LinearRegression'] = LinearRegression()
    alpha = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    for a in alpha:
        models[f'LassoRegression_({a})'] = Lasso(alpha=a)
    for a in alpha:
        models[f'RidgeRegression_({a})'] = Ridge(alpha=a)
    for l1, l2 in combinations(alpha, 2):
        models[f'ElasticNetRegression_({l2})_({l1})'] = ElasticNet(alpha=l2, l1_ratio=l1)
    
    models['HuberRegression'] = HuberRegressor()
    models['LarsRegression'] = Lars()
    models['LassoLarsRegression'] = LassoLars()
    models['PassiveAggressiveRegression'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)
    models['RANSACRegression'] = RANSACRegressor()
    models['SGDRegression'] = SGDRegressor(max_iter=1000, tol=1e-3)
    models['TheilSenRegression'] = TheilSenRegressor()
    
    # Non Linear Learning
    for k in range(1, 21):
        models[f'KNeighborsRegressor_({k})'] = KNeighborsRegressor(n_neighbors=k)
    models['TreeRegression'] = DecisionTreeRegressor()
    models['ExtraTreeRegression'] = ExtraTreeRegressor()
    models['LinearSupportVectorRegression'] = SVR(kernel='linear')
    models['PolynomialSupportVectorRegression'] = SVR(kernel='poly')
    c_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    for c in c_values:
        models[f'SupportVectorRegression_({c})'] = SVR(C=c)
    
    # Ensemble Learning
    n_trees = 100
    models['AdaBoostRegressoion'] = AdaBoostRegressor(n_estimators=n_trees)
    models['BaggingRegression'] = BaggingRegressor(n_estimators=n_trees)
    models['RandomForestRegression'] = RandomForestRegressor(n_estimators=n_trees)
    models['ExtraTreesRegression'] = ExtraTreesRegressor(n_estimators=n_trees)
    models['GradientBoostingRegression'] = GradientBoostingRegressor(n_estimators=n_trees)
    
    return models

In [61]:
models = get_classification_models()
# models = get_regression_models()
models

{'LogisticRegression': LogisticRegression(),
 'RidgeClassifier_(0.1)': RidgeClassifier(alpha=0.1),
 'RidgeClassifier_(0.2)': RidgeClassifier(alpha=0.2),
 'RidgeClassifier_(0.3)': RidgeClassifier(alpha=0.3),
 'RidgeClassifier_(0.4)': RidgeClassifier(alpha=0.4),
 'RidgeClassifier_(0.5)': RidgeClassifier(alpha=0.5),
 'RidgeClassifier_(0.6)': RidgeClassifier(alpha=0.6),
 'RidgeClassifier_(0.7)': RidgeClassifier(alpha=0.7),
 'RidgeClassifier_(0.8)': RidgeClassifier(alpha=0.8),
 'RidgeClassifier_(0.9)': RidgeClassifier(alpha=0.9),
 'RidgeClassifier_(1.0)': RidgeClassifier(),
 'SGDClassifier': SGDClassifier(),
 'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),
 'DecisionTreeClassifier': DecisionTreeClassifier(),
 'ExtraTreeClassifier': ExtraTreeClassifier(),
 'KNeighborsClassifier_(1)': KNeighborsClassifier(n_neighbors=1),
 'KNeighborsClassifier_(2)': KNeighborsClassifier(n_neighbors=2),
 'KNeighborsClassifier_(3)': KNeighborsClassifier(n_neighbors=3),
 'KNeighborsClassifier_(4)':