In [51]:
# Dépendances 
import torch
from torch.autograd import Variable
from PIL import Image
import os
import pandas as pd
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, confusion_matrix, classification_report, make_scorer
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier

# Fonction de récupération des features dans une image
def get_vector(image_name):

    # Charger une image avec la librarie Pillow
    img = Image.open(image_name)
    # Créer une variable Pytorch afin de 
    t_img = Variable(normalize(to_tensor(scaler(img))).unsqueeze(0))
    # Créer un vecteur de 0 qui contiendra les features 
    my_embedding = torch.zeros(512)
    # Définir une fonction qui copy les résultats dans les couches
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))
    h = layer.register_forward_hook(copy_data)
    # Lancer le modèle sur l'image 
    model(t_img)
    h.remove()
    # Renvoyer les features
    return my_embedding.numpy()

# Fonction qui boucle sur toutes les images d'un jeu de données
def features_train(directory):

    # On instancie les listes qui stockeront les informations
    features = []
    totaldir = []
    y = []
    
    # Liste avec tous les chemins des images
    damage_dir = str(directory + "/damage/")
    damage = os.listdir(damage_dir)
    for i in damage :
        totaldir.append(str(damage_dir + i))
        y.append("damage")
    
    no_damage_dir = str(directory + "/no_damage/")
    no_damage = os.listdir(no_damage_dir)
    for i in no_damage :
        totaldir.append(str(no_damage_dir + i))
        y.append("no_damage")

    # Récupération des features dans les images
    for dir in totaldir : 
        features.append(get_vector(dir))

    features = pd.DataFrame(features)

    return features, y

# Fonction d'évaluation
def evaluation(model, X_train, y_train, X_test, y_test, target_names):
  model.fit(X_train, y_train)
  ypred = model.predict(X_test)

  print(pd.DataFrame(confusion_matrix(y_test,ypred), 
                  columns=['pred_0','pred_1'],
                  index=['obs_0','obs_1']))
                  

  print(classification_report(y_test, ypred, target_names = target_names))

# Fonction de Gridsearch
def grid_search(model, parameters, scorer, cv, X_train, y_train):

    best_model = GridSearchCV(model,param_grid = parameters, scoring = scorer, verbose = 2, cv = cv)
    best_model.fit(X_train, y_train)

    return best_model.best_estimator_, best_model.scorer_, best_model.best_score_

In [3]:
# Chemin des différents datasets
train = "/Users/titouanhoude/Deep Learning/Post-hurricane/train_another"
validation = "/Users/titouanhoude/Deep Learning/Post-hurricane/validation_another"
balanced_test = "/Users/titouanhoude/Deep Learning/Post-hurricane/test"
unbalanced_test = "/Users/titouanhoude/Deep Learning/Post-hurricane/test_another"

# Charger un modèle pré-entraîné
model = models.resnet18(pretrained=True)

# Utiliser ce modèle pour obtenir la couche qui nous intérésse
layer = model._modules.get('avgpool')

# Evaluer ce modèle
model.eval()

# Transformer les images dans le bon format, normalisation et conversion en Tenseur
scaler = transforms.Resize((150, 150))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
to_tensor = transforms.ToTensor()

X_train, y_train = features_train(train)
X_test, y_test = features_train(validation)

In [50]:
# Knn
knn = KNeighborsClassifier(n_neighbors=5)
target_names = ["damage", "no_damage"]

evaluation(knn, X_train, y_train, X_test, y_test, target_names)

       pred_0  pred_1
obs_0     942      58
obs_1     124     876
              precision    recall  f1-score   support

      damage       0.88      0.94      0.91      1000
   no_damage       0.94      0.88      0.91      1000

    accuracy                           0.91      2000
   macro avg       0.91      0.91      0.91      2000
weighted avg       0.91      0.91      0.91      2000



In [10]:
# Random Forest de base
clf = RandomForestClassifier(random_state=0)
target_names = ["damage", "no_damage"]

evaluation(clf, X_train, y_train, X_test, y_test, target_names)

       pred_0  pred_1
obs_0     873     127
obs_1      85     915
              precision    recall  f1-score   support

      damage       0.91      0.87      0.89      1000
   no_damage       0.88      0.92      0.90      1000

    accuracy                           0.89      2000
   macro avg       0.89      0.89      0.89      2000
weighted avg       0.89      0.89      0.89      2000



In [37]:
# Paramètres de la Grid Search
parameters = {'n_estimators' : np.arange(start = 10, stop = 210, step = 25), 
    'max_depth' : np.arange(start = 10, stop = 20, step = 2)}
model = RandomForestClassifier()
scorer = make_scorer(f1_score , average='macro')
cv = 3

# Grid Search du Random Forest
best_estimator, best_scorer, best_score_ = grid_search(model, parameters, scorer, cv, X_train, y_train)

print("Voici les paramètres du meilleure modèle : " + str(best_estimator))
print("Voici le "  + str(best_scorer) + " du meilleure modèle : " + str(best_score_))

In [40]:
# Evaluation du modèle Random Forest avec les meilleurs paramètres
clf = RandomForestClassifier(max_depth=16, n_estimators=110)
target_names = ["damage", "no_damage"]

evaluation(clf, X_train, y_train, X_test, y_test, target_names)

       pred_0  pred_1
obs_0     868     132
obs_1      78     922
              precision    recall  f1-score   support

      damage       0.92      0.87      0.89      1000
   no_damage       0.87      0.92      0.90      1000

    accuracy                           0.90      2000
   macro avg       0.90      0.90      0.89      2000
weighted avg       0.90      0.90      0.89      2000



In [52]:
# Evaluation du modèle XGB avec les meilleurs paramètres
xgb = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
target_names = ["damage", "no_damage"]

evaluation(clf, X_train, y_train, X_test, y_test, target_names)

       pred_0  pred_1
obs_0     866     134
obs_1      75     925
              precision    recall  f1-score   support

      damage       0.92      0.87      0.89      1000
   no_damage       0.87      0.93      0.90      1000

    accuracy                           0.90      2000
   macro avg       0.90      0.90      0.90      2000
weighted avg       0.90      0.90      0.90      2000

