In [1]:
import pandas as pd
import numpy as np
import sklearn
import sklearn.model_selection

from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv("../resource/asnlib/publicdata/wine.data.csv")

target = data['label']
data = data.drop(['label'], axis=1)
data = data.to_numpy()
target = target.to_numpy()

In [3]:
# Training 

def train(X_train, y_train):
    # create SVMs
    svm = LinearSVC(random_state=0)
    svm.fit(X_train, y_train)
    
    mlp = MLPClassifier(random_state=0)
    mlp.fit(X_train, y_train)
    
    return svm, mlp # return the trained models

In [5]:
# Predicting

def predict(X_valid, svm, mlp):
    svm_pred = svm.predict(X_valid)
    mlp_pred = mlp.predict(X_valid)
    
    features = np.column_stack((svm_pred, mlp_pred))

    return features # return the models' predictions as a matrix

In [7]:
# Meta-learning

def meta_learn(features, y_valid):
    logistic = LogisticRegression(random_state=0).fit(features, y_valid)

    return logistic # return the logistic model

In [10]:
# Ensembling

def ensemble_predict(X_test, y_test, svm, mlp, logistic):
    svm_pred = svm.predict(X_test)
    mlp_pred = mlp.predict(X_test)
    features = np.column_stack((svm_pred, mlp_pred))
    logistic_pred = logistic.predict(features)

    svm_acc = accuracy_score(y_test, svm_pred)
    mlp_acc = accuracy_score(y_test, mlp_pred)
    logistic_acc = accuracy_score(y_test, logistic_pred)

    return svm_acc, mlp_acc, logistic_acc # return the accuracies

In [11]:
# All together
performances = []

kf = sklearn.model_selection.KFold(10, shuffle=False)
for rest_index, test_index in kf.split(data):
    ## Train/test split
    midpoint = int(len(rest_index)/2)
    
    X_train = data[rest_index[0:midpoint]]
    y_train = target[rest_index[0:midpoint]]

    X_valid = data[rest_index[midpoint:]]
    y_valid = target[rest_index[midpoint:]]
    
    X_test = data[test_index]
    y_test = target[test_index]

    ## Training
    svm, mlp = train(X_train, y_train)

    ## Predicting
    features = predict(X_valid, svm, mlp)
    
    logistic = meta_learn(features, y_valid)

    ## Ensembling
    accuracies = ensemble_predict(X_test, y_test, svm, mlp, logistic)

    performances.append(accuracies)

avg_accs = np.array(performances).mean(axis=0)


