In [23]:
import torch
from torch.autograd import Variable
from PIL import Image
import os
import pandas as pd
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, confusion_matrix, classification_report, make_scorer
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier

def get_vector(image_name):

    # Charger une image avec la librarie Pillow
    img = Image.open(image_name)
    # Créer une variable Pytorch afin de 
    t_img = Variable(normalize(to_tensor(scaler(img))).unsqueeze(0))
    # 3. Create a vector of zeros that will hold our feature vector
    #    The 'avgpool' layer has an output size of 512
    my_embedding = torch.zeros(512)
    # 4. Define a function that will copy the output of a layer
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))
    # 5. Attach that function to our selected layer
    h = layer.register_forward_hook(copy_data)
    # 6. Run the model on our transformed image
    model(t_img)
    # 7. Detach our copy function from the layer
    h.remove()
    # 8. Return the feature vector
    return my_embedding.numpy()

def features_train(directory):

    features = []
    totaldir = []
    y = []
    
    damage_dir = str(directory + "/damage/")
    damage = os.listdir(damage_dir)
    for i in damage :
        totaldir.append(str(damage_dir + i))
        y.append("damage")
    
    no_damage_dir = str(directory + "/no_damage/")
    no_damage = os.listdir(no_damage_dir)
    for i in no_damage :
        totaldir.append(str(no_damage_dir + i))
        y.append("no_damage")

    for dir in totaldir : 
        features.append(get_vector(dir))

    features = pd.DataFrame(features)

    return features, y

def evaluation(model, X_train, y_train, X_test, y_test, target_names):
  model.fit(X_train, y_train)
  ypred = model.predict(X_test)

  print(pd.DataFrame(confusion_matrix(y_test,ypred), 
                  columns=['pred_0','pred_1'],
                  index=['obs_0','obs_1']))
                  

  print(classification_report(y_test, ypred, target_names = target_names))

def grid_search(model, parameters, scorer, cv, X_train, y_train):

    best_model = GridSearchCV(model,param_grid = parameters, scoring = scorer, verbose = 2, cv = cv)
    best_model.fit(X_train, y_train)

    return best_model.best_estimator_, best_model.scorer_, best_model.best_score_

In [3]:
train = "/Users/titouanhoude/Deep Learning/Post-hurricane/train_another"
validation = "/Users/titouanhoude/Deep Learning/Post-hurricane/validation_another"
balanced_test = "/Users/titouanhoude/Deep Learning/Post-hurricane/test"
unbalanced_test = "/Users/titouanhoude/Deep Learning/Post-hurricane/test_another"

# Charger un modèle pré-entraîné
model = models.resnet18(pretrained=True)

# Utilisé ce modèle pour obtenir la couche qui nous intérésse
layer = model._modules.get('avgpool')

# Evaluer ce modèle
model.eval()

# Transformer les images
scaler = transforms.Resize((150, 150))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
to_tensor = transforms.ToTensor()

X_train, y_train = features_train(train)
X_test, y_test = features_train(validation)

In [10]:
clf = RandomForestClassifier(random_state=0)
target_names = ["damage", "no_damage"]

evaluation(clf, X_train, y_train, X_test, y_test, target_names)

       pred_0  pred_1
obs_0     873     127
obs_1      85     915
              precision    recall  f1-score   support

      damage       0.91      0.87      0.89      1000
   no_damage       0.88      0.92      0.90      1000

    accuracy                           0.89      2000
   macro avg       0.89      0.89      0.89      2000
weighted avg       0.89      0.89      0.89      2000



In [26]:
parameters = {'n_estimators' : np.arange(start = 10, stop = 500, step = 10), 'max_depth' : np.arange(start = 1, stop = 11, step = 1)}
model = RandomForestClassifier()
scorer = make_scorer(f1_score , average='macro')
cv = 5

In [27]:
best_estimator, best_scorer, best_score_ = grid_search(model, parameters, scorer, cv, X_train, y_train)

[CV] END ......................max_depth=1, n_estimators=240; total time=   2.4s
[CV] END ......................max_depth=1, n_estimators=240; total time=   2.4s
[CV] END ......................max_depth=1, n_estimators=240; total time=   2.4s
[CV] END ......................max_depth=1, n_estimators=240; total time=   2.4s
[CV] END ......................max_depth=1, n_estimators=250; total time=   2.5s
[CV] END ......................max_depth=1, n_estimators=250; total time=   2.5s
[CV] END ......................max_depth=1, n_estimators=250; total time=   2.5s
[CV] END ......................max_depth=1, n_estimators=250; total time=   2.5s
[CV] END ......................max_depth=1, n_estimators=250; total time=   2.5s
[CV] END ......................max_depth=1, n_estimators=260; total time=   2.7s
[CV] END ......................max_depth=1, n_estimators=260; total time=   2.6s
[CV] END ......................max_depth=1, n_estimators=260; total time=   2.6s
[CV] END ...................

In [None]:
print("Voici les paramètres du meilleure modèle : " + str(best_estimator))
print("Voici le "  + str(best_scorer) + " du meilleure modèle : " + str(best_score_))