# Imports

In [1]:
from pandas import read_csv
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import warnings
from sklearn.exceptions import DataConversionWarning

# Load dataset

In [2]:
def comparator(url):
    dataset = read_csv(url)
    split_data(dataset)

# Split validation dataset

In [3]:
def split_data(dataset):
    array = dataset.values
    X = array[:,0:len(dataset.columns)]
    y = array[:,len(dataset.columns)-1:]
    X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1, shuffle=True)
    test_models(X_train, X_validation, Y_train, Y_validation)

# Initialize algorithms

In [4]:
def all_algorithms():
    models = []
    models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('KNN', KNeighborsClassifier()))
    models.append(('CART', DecisionTreeClassifier()))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC(gamma='auto')))
    return models

# Evaluate each model

In [5]:
def test_models(X_train, X_validation, Y_train, Y_validation):
    warnings.filterwarnings("ignore")
    results = []
    names = []
    res = []
    models = all_algorithms()
    for name, model in models:
        kfold = StratifiedKFold(n_splits=6, random_state=1)
        cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
        results.append(cv_results)
        names.append(name)
        res.append('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
    printer(res)

# Print the results for each model

In [6]:
def printer(res):
    for r in res:
        print(r)

# Indicate where to find the csv file and call the \"comparator\" function

In [7]:
my_dataset = "https://raw.githubusercontent.com/exequielmoneva/Boston-Housing-Prices/master/housing.csv"

In [8]:
comparator(my_dataset)

LR: 0.077623 (0.080514)
LDA: 0.071087 (0.084842)
KNN: 0.723394 (0.311798)
CART: 0.755227 (0.257136)
NB: 0.916667 (0.186339)
SVM: 0.438065 (0.302027)
