# Task 2: Binary Classification with Seeds Dataset

In [46]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler, label_binarize
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


In [47]:

seeds_df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00236/seeds_dataset.txt', sep='\t+', engine='python', header=None)

seeds_df.columns = ['area', 'perimeter', 'compactness', 'length_kernel', 'width_kernel', 'asymmetry_coeff', 'length_groove', 'type']

In [48]:
seeds_df['type'] = seeds_df['type'].replace({1: 'Kama', 2: 'Rosa', 3: 'Canadian'})
seeds_df = seeds_df[seeds_df['type'].isin(['Kama', 'Rosa'])]
seeds_df['type'] = seeds_df['type'].replace({'Kama': 0, 'Rosa': 1})


In [49]:
X = seeds_df.iloc[:, :-1]
y = seeds_df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [50]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [51]:
svm = SVC(kernel='linear', random_state=42, probability=True)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)

In [52]:
# print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1-score:', f1_score(y_test, y_pred))
print('ROC AUC:', roc_auc_score(y_test, y_pred))


Precision: 1.0
Recall: 1.0
F1-score: 1.0
ROC AUC: 1.0


# Task 3: Train a third probabilistic classifier

In [53]:
from sklearn.neural_network import MLPClassifier

rf_clf = RandomForestClassifier()
rf_clf.fit(X_train, y_train)

mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=500, random_state=42)
mlp.fit(X_train, y_train)

MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=500, random_state=42)

In [54]:
from sklearn.metrics import classification_report

mlp = MLPClassifier(random_state=1, max_iter=300).fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)
print('MLPClassifier:\n', classification_report(y_test, y_pred_mlp))

svm = svm_clf.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
print('SVM:\n', classification_report(y_test, y_pred_svm))

y_pred_rf = rf_clf.predict(X_test)
print('Random Forest:\n', classification_report(y_test, y_pred_rf))

MLPClassifier:
               precision    recall  f1-score   support

           0       0.96      0.96      0.96        23
           1       0.95      0.95      0.95        19

    accuracy                           0.95        42
   macro avg       0.95      0.95      0.95        42
weighted avg       0.95      0.95      0.95        42

SVM:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        19

    accuracy                           1.00        42
   macro avg       1.00      1.00      1.00        42
weighted avg       1.00      1.00      1.00        42

Random Forest:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        19

    accuracy                           1.00        42
   macro avg       1.00      1.00      1.00        42
weighted avg       1.00      1.00   

