## Loading dataset

In [1]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)

## Removing feature with low variance


In [2]:
from sklearn.feature_selection import VarianceThreshold

selection = VarianceThreshold(threshold=(0.1))    
X = selection.fit_transform(X)

## Data Splitting

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

## Building Classification models

In [4]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import f1_score

### K Nearest Neighbors

In [5]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(3) # Define classifier
knn.fit(X_train, y_train) # Train model


# Make predictions
y_train_pred = knn.predict(X_train)
y_test_pred = knn.predict(X_test)

# Training set performance
knn_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
knn_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
knn_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set performance
knn_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
knn_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
knn_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score


print('Model performance for Training set')
print('- Accuracy: {:.2f}'.format(knn_train_accuracy))
print('- MCC: {:.2f}'.format(knn_train_mcc))
print('- F1 score: {:.2f}'.format(knn_train_f1))
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: {}'.format(knn_test_accuracy))
print('- MCC: {}'.format(knn_test_mcc))
print('- F1 score: {}'.format(knn_test_f1))

Model performance for Training set
- Accuracy: 0.96
- MCC: 0.94
- F1 score: 0.96
----------------------------------
Model performance for Test set
- Accuracy: 1.0
- MCC: 1.0
- F1 score: 1.0


### Support vector machine (Radial basis function kernel)

In [6]:
from sklearn.svm import SVC

svm_rbf = SVC(gamma=2, C=1)
svm_rbf.fit(X_train, y_train)

# Make predictions
y_train_pred = svm_rbf.predict(X_train)
y_test_pred = svm_rbf.predict(X_test)

# Training set performance
svm_rbf_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
svm_rbf_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
svm_rbf_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set performance
svm_rbf_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
svm_rbf_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
svm_rbf_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score

print('Model performance for Training set')
print('- Accuracy: {:.2f}'.format(svm_rbf_train_accuracy))
print('- MCC: {:.2f}'.format(svm_rbf_train_mcc))
print('- F1 score: {:.2f}'.format(svm_rbf_train_f1))
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: {:.2f}'.format(svm_rbf_test_accuracy))
print('- MCC: {:.2f}'.format(svm_rbf_test_mcc))
print('- F1 score: {:.2f}'.format(svm_rbf_test_f1))

Model performance for Training set
- Accuracy: 0.99
- MCC: 0.99
- F1 score: 0.99
----------------------------------
Model performance for Test set
- Accuracy: 0.97
- MCC: 0.95
- F1 score: 0.97


### Decision tree

In [7]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=5) # Define classifier
dt.fit(X_train, y_train) # Train model

# Make predictions
y_train_pred = dt.predict(X_train)
y_test_pred = dt.predict(X_test)

# Training set performance
dt_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
dt_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
dt_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set performance
dt_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
dt_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
dt_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score

print('Model performance for Training set')
print('- Accuracy: {:.2f}'.format(dt_train_accuracy))
print('- MCC: {:.2f}'.format(dt_train_mcc))
print('- F1 score: {:.2f}'.format(dt_train_f1))
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: {:.2f}'.format(dt_test_accuracy))
print('- MCC: {:.2f}'.format(dt_test_mcc))
print('- F1 score: {:.2f}'.format(dt_test_f1))

Model performance for Training set
- Accuracy: 1.00
- MCC: 1.00
- F1 score: 1.00
----------------------------------
Model performance for Test set
- Accuracy: 0.93
- MCC: 0.90
- F1 score: 0.93


### Random forest

In [8]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=10) # Define classifier
rf.fit(X_train, y_train) # Train model

# Make predictions
y_train_pred = rf.predict(X_train)
y_test_pred = rf.predict(X_test)

# Training set performance
rf_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
rf_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
rf_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set performance
rf_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
rf_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
rf_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score

print('Model performance for Training set')
print('- Accuracy: {:.2f}'.format(rf_train_accuracy))
print('- MCC: {:.2f}'.format(rf_train_mcc))
print('- F1 score: {:.2f}'.format(rf_train_f1))
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: {:.2f}'.format(rf_test_accuracy))
print('- MCC: {:.2f}'.format(rf_test_mcc))
print('- F1 score: {:.2f}'.format(rf_test_f1))

Model performance for Training set
- Accuracy: 1.00
- MCC: 1.00
- F1 score: 1.00
----------------------------------
Model performance for Test set
- Accuracy: 0.97
- MCC: 0.95
- F1 score: 0.97


### Neural network

In [9]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(alpha=1, max_iter=1000)
mlp.fit(X_train, y_train)

# Make predictions
y_train_pred = mlp.predict(X_train)
y_test_pred = mlp.predict(X_test)

# Training set performance
mlp_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
mlp_train_mcc = matthews_corrcoef(y_train, y_train_pred) # Calculate MCC
mlp_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score

# Test set performance
mlp_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
mlp_test_mcc = matthews_corrcoef(y_test, y_test_pred) # Calculate MCC
mlp_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score

print('Model performance for Training set')
print('- Accuracy: {:.2f}'.format(mlp_train_accuracy))
print('- MCC: {:.2f}'.format(mlp_train_mcc))
print('- F1 score: {:.2f}'.format(mlp_train_f1))
print('----------------------------------')
print('Model performance for Test set')
print('- Accuracy: {:.2f}'.format(mlp_test_accuracy))
print('- MCC: {:.2f}'.format(mlp_test_mcc))
print('- F1 score: {:.2f}'.format(mlp_test_f1))

Model performance for Training set
- Accuracy: 0.98
- MCC: 0.98
- F1 score: 0.98
----------------------------------
Model performance for Test set
- Accuracy: 1.00
- MCC: 1.00
- F1 score: 1.00
