# **Classificazione con metodi di Machine Learning**
Una volta estratte le features delle immagini del nostro dataset, le utilizziamo per la classificazione, comparando le prestazioni di quattro diversi modelli

In [7]:
# Import generali
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

### **Caricamento dei dati da Google drive (dataset non aumentato)**

In [8]:
AM = ['', 'Crop', 'Rotated', 'Noisy', 'Flipped']
df = pd.DataFrame()

# Creazione delle variabili features e labels dall'estrazione delle caratteristiche dell'architettura Xception
df = pd.read_csv(f'/content/drive/MyDrive/progetto_deep/Features/XceptionFeaturesAugg{AM[1]}.csv')
df.drop(["Unnamed: 0"], axis=1, inplace=True)
labels_XC = df["diag"]
df.drop(["diag"], axis=1, inplace=True)
features_XC = df.copy()

# Creazione delle variabili features e labels dall'estrazione delle caratteristiche dell'architettura EfficientNet
df = pd.read_csv(f'/content/drive/MyDrive/progetto_deep/Features/EfficientNetFeaturesAugg{AM[1]}.csv')
df.drop(["Unnamed: 0"], axis=1, inplace=True)
labels_EN = df["diag"]
df.drop(["diag"], axis=1, inplace=True)
features_EN = df.copy()

### **1. Classificatore Naive Baies**

In [9]:
def naiveBayes(features, labels):
  # Sudivisione dei dati in set di addestramento e di test
  features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.20, random_state=42)

  # Inizializzazione ed addestramento del classificatore
  clf = GaussianNB()
  clf.fit(features_train, labels_train)

  # Predizioni sul set di test
  predictions = clf.predict(features_test)

  # Stampe delle prestazioni
  accuracy = accuracy_score(labels_test, predictions)
  print(f'Accuracy: {accuracy}')
  print(classification_report(labels_test, predictions))

print("Xception")
naiveBayes(features_XC, labels_XC)
print("\nEfficientNet")
naiveBayes(features_EN, labels_EN)

Xception
Accuracy: 0.652027027027027
              precision    recall  f1-score   support

           0       0.75      0.54      0.63       162
           1       0.59      0.78      0.67       134

    accuracy                           0.65       296
   macro avg       0.67      0.66      0.65       296
weighted avg       0.68      0.65      0.65       296


EfficientNet
Accuracy: 0.7263513513513513
              precision    recall  f1-score   support

           0       0.73      0.79      0.76       162
           1       0.72      0.65      0.68       134

    accuracy                           0.73       296
   macro avg       0.73      0.72      0.72       296
weighted avg       0.73      0.73      0.72       296



### **2. Classificatore K-Nearest Neighbours**

In [10]:
def knn(features, labels, k):
  # Sudivisione dei dati in set di addestramento e di test
  features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.20, random_state=42)

  # Standardizzazione delle features
  scaler = StandardScaler()
  features_train = scaler.fit_transform(features_train)
  features_test = scaler.transform(features_test)

  # Inizializzazione ed addestramento del classificatore
  clf = KNeighborsClassifier(n_neighbors=k)
  clf.fit(features_train, labels_train)

  # Predizioni sul set di test
  predictions = clf.predict(features_test)

  # Stampe delle prestazioni
  accuracy = accuracy_score(labels_test, predictions)
  print(f'Accuracy: {accuracy}')
  print(classification_report(labels_test, predictions))

print("Xception")
knn(features_XC, labels_XC, 5)
print("\nEfficientNet")
knn(features_EN, labels_EN, 5)

Xception
Accuracy: 0.7432432432432432
              precision    recall  f1-score   support

           0       0.73      0.83      0.78       162
           1       0.76      0.63      0.69       134

    accuracy                           0.74       296
   macro avg       0.75      0.73      0.74       296
weighted avg       0.75      0.74      0.74       296


EfficientNet
Accuracy: 0.8783783783783784
              precision    recall  f1-score   support

           0       0.88      0.91      0.89       162
           1       0.88      0.84      0.86       134

    accuracy                           0.88       296
   macro avg       0.88      0.88      0.88       296
weighted avg       0.88      0.88      0.88       296



### **3. Classificatore Random Forest**

In [None]:
# Calcolo dell'accuracy media per 100 esecuzioni
def randomForestMean(estimator, x, y):
  total_acc=0

  for i in range(100):
    # Sudivisione dei dati in set di addestramento e di test
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 20, random_state=42)

    # Inizializzazione ed addestramento del classificatore
    rnd_forest = RandomForestClassifier(n_estimators= estimator)
    rnd_forest.fit(X_train, y_train)

    # Calcolo delle prestazioni
    total_acc += rnd_forest.score(X_test,y_test)
  return total_acc/100

# Stampe delle prestazioni
print("Xception")
print("Test accuracy of random forest classifier with n_estimators = 10 : {:.2f}".format(randomForestMean(10, features_XC, labels_XC)))
print("Test accuracy of random forest classifier with n_estimators = 100 : {:.2f}".format(randomForestMean(100, features_XC, labels_XC)))
print("\nEfficientNet")
print("Test accuracy of random forest classifier with n_estimators = 10 : {:.2f}".format(randomForestMean(10, features_EN, labels_EN)))
print("Test accuracy of random forest classifier with n_estimators = 100 : {:.2f}".format(randomForestMean(100, features_EN, labels_EN)))


Xception
Test accuracy of random forest classifier with n_estimators = 10 : 0.75


### **4. Classificatore Boosting**

In [6]:
# Calcolo dell'accuracy media per 100 esecuzioni
def boostingMean(estimator, x, y):
  total_acc=0

  for i in range(100):
    # Sudivisione dei dati in set di addestramento e di test
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 20, random_state=42)

    # Inizializzazione ed addestramento del classificatore
    boost = AdaBoostClassifier(n_estimators=estimator, algorithm="SAMME", random_state=0)
    boost.fit(X_train, y_train)

    # Calcolo delle prestazioni
    total_acc += boost.score(X_test,y_test)
  return total_acc/100

# Stampe delle prestazioni
print("Xception")
print("Test accuracy of ada boosting classifier with n_estimators = 10 : {:.2f}".format(boostingMean(10, features_XC, labels_XC)))
print("Test accuracy of ada boosting classifier with n_estimators = 50 : {:.2f}".format(boostingMean(50, features_XC, labels_XC)))
print("Test accuracy of ada boosting classifier with n_estimators = 10 : {:.2f}".format(boostingMean(10, features_EN, labels_EN)))
print("Test accuracy of ada boosting classifier with n_estimators = 50 : {:.2f}".format(boostingMean(50, features_EN, labels_EN)))


Xception
Test accuracy of ada boosting classifier with n_estimators = 10 : 0.75
Test accuracy of ada boosting classifier with n_estimators = 50 : 0.80
Test accuracy of ada boosting classifier with n_estimators = 100 : 0.80

EfficientNet
Test accuracy of ada boosting classifier with n_estimators = 10 : 0.65
Test accuracy of ada boosting classifier with n_estimators = 50 : 0.75
Test accuracy of ada boosting classifier with n_estimators = 100 : 0.75
