# AutoSklearn 

Group 18 Members:

- Clara Pichler, 11917694
- Hannah Knapp, 11901857 
- Sibel Toprakkiran, 09426341

### Overview

1. Data Sets

2. Evaluation of AutoSklearn
- Iris Dataset
- Congressional Voting Dataset
- Airfoil Dataset
- Abalone Data set


The evaluation of our implementation and TPOT will be done in the files `ML_A3_Group18.ipynb` and `tpot.ipynb`.

In [None]:
from autosklearn.classification import AutoSklearnClassifier
from autosklearn.regression import AutoSklearnRegressor

from sklearn.experimental import enable_iterative_imputer 
from sklearn.impute import IterativeImputer

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error

import numpy as np
import pandas as pd

## Data

Small pre-processing steps and splitting into train and test set with a ratio of 7:3 just as for TPOT.

In [None]:
iris = datasets.load_iris()
iris_data = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])
iris_data['target'] = iris_data['target'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

df_voting = pd.read_csv('/CongressionalVotingID.shuf.lrn.csv')

df_airfoil = pd.read_csv("/airfoil_noise_data.csv")

url='/abalone.csv'
column_names = ["Sex", "Length", "Diameter", "Height", "Whole_weight", "Shucked_weight", "Viscera_weight", "Shell_weight", "Rings"]
df_abalone = pd.read_csv(url, header=0, names=column_names)
df_abalone = df_abalone[df_abalone.Height != 0]

X_iris = iris_data.drop(['target'], axis=1)
y_iris = iris_data['target']

X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(X_iris, y_iris, test_size=0.7, random_state=42)

pd.set_option('future.no_silent_downcasting', True)
df_voting = df_voting.replace({"democrat": 0,"republican": 1,"n": 0,"y": 1,"unknown": np.nan})
df_voting = df_voting.drop(columns=['ID'])

imp = IterativeImputer(max_iter=10, random_state=0)
df_voting = pd.DataFrame(imp.fit_transform(df_voting), columns=df_voting.columns)

X_voting = df_voting.drop(['class'], axis=1)
y_voting = df_voting['class']

X_train_voting, X_test_voting, y_train_voting, y_test_voting = train_test_split(X_voting, y_voting, test_size=0.7, random_state=42)

df_abalone = df_abalone[df_abalone.Height != 0]

X_airfoil = df_airfoil.drop(['y'], axis=1)
y_airfoil = df_airfoil['y']

X_train_airfoil, X_test_airfoil, y_train_airfoil, y_test_airfoil = train_test_split(X_airfoil, y_airfoil, test_size=0.7, random_state=42)

X_abalone_reg = df_abalone.drop(['Rings'], axis=1)
y_abalone_reg = df_abalone['Rings']

X_train_abalone, X_test_abalone, y_train_abalone, y_test_abalone = train_test_split(X_abalone_reg, y_abalone_reg, test_size=0.7, random_state=42)


## Evaluation

In [None]:
cls = AutoSklearnClassifier()

reg = AutoSklearnRegressor()

### Iris

In [None]:
cls.fit(X_train_iris, y_train_iris)
predictions = cls.predict(X_test_iris)

print("Accuracy score", accuracy_score(y_test_iris, predictions))
iris_leadership = cls.leaderboard(detailed = True,ensemble_only=False,sort_order="descending")
iris_leadership
#print(cls.show_models())

### Congressional Voting

In [None]:
cls.fit(X_train_voting, y_train_voting)
predictions = cls.predict(X_test_voting)

print("Accuracy score", accuracy_score(y_test_voting, predictions))
voting_leadership = cls.leaderboard(detailed = True,ensemble_only=False,sort_order="descending")
voting_leadership
#print(cls.show_models())

### Airfoil

In [None]:
reg.fit(X_train_airfoil, y_train_airfoil)
predictions = reg.predict(X_test_airfoil)

print("MAE test score:", mean_absolute_error(y_test_airfoil, predictions))
print("MSE test score:", mean_squared_error(y_test_airfoil, predictions))
airfoil_leadership = reg.leaderboard(detailed = True,ensemble_only=False,sort_order="descending")
airfoil_leadership
#print(reg.show_models())

### Abalone

In [None]:
reg.fit(X_train_abalone, y_train_abalone)
predictions = reg.predict(X_test_abalone)

print("MAE test score:", mean_absolute_error(y_test_abalone, predictions))
print("MSE test score:", mean_squared_error(y_test_abalone, predictions))
abalone_leadership = reg.leaderboard(detailed = True,ensemble_only=False,sort_order="descending")
abalone_leadership
#print(reg.show_models())