<a href="https://colab.research.google.com/github/domingues100/IEEE---Water_Level/blob/main/SN1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **IMPORTS**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import ResNet50V2, ResNet101V2
#from tensorflow.keras.applications.resnet_v2 import preprocess_input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import tensorflow_hub as hub
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# **Side Functions**

In [None]:
idg = ImageDataGenerator(rescale=1./255)

def load_and_preprocess_image(image_path):
    imagem = image.load_img(image_path, target_size=(224, 224))
    imagem_array = image.img_to_array(imagem)
    imagem_array = np.expand_dims(imagem_array, axis=0)
    imagem_array = idg.flow(imagem_array, batch_size=1)[0]
    return imagem_array

def generate_feature_vector(img_path):
    img = load_and_preprocess_image(img_path)
    features = model.predict(img)
    feature_vector = np.array(features.flatten())
    return feature_vector

def list_features_classes(images_df, path):
    feature_vectors = []
    class_name = []

    for index, row in images_df.iterrows():
        image = row['id']
        classe = row['label']

        feature_vector = generate_feature_vector(os.path.join(path, image))
        class_name.append(classe)
        feature_vectors.append(feature_vector)

    return class_name, feature_vectors

def create_csv(classe, feature, feature_csv_name):
    vetores_path = f'/content/drive/MyDrive/Transaciones/SN1/Teste 2 - 5 imagem/{feature_csv_name}'
    data = {'Classe': classe, 'Feature_vector': feature}
    faces_df = pd.DataFrame(data)
    faces_df.to_csv(vetores_path, index=False)

    return vetores_path

def distancia_euclidiana(vec1, vec2):
    """
    Passando 2 vetores, obtém-se a distancia euclidiana entre eles
    vec1 = feature_vector 1
    vec2 = feature_vector 2
    """
    return np.linalg.norm(vec1 - vec2)

def comparar_features(new_feature_vector, features_csv):
    df = pd.read_csv(features_csv)

    df['Feature_vector'] = df['Feature_vector'].apply(lambda x: np.fromstring(x[1:-1], sep=" "))

    label = None
    distancia_minima = float('inf')

    for index, linha in df.iterrows():
      distancia = distancia_euclidiana(new_feature_vector, np.array(linha['Feature_vector']))
      if distancia < distancia_minima:
          distancia_minima = distancia
          label = linha['Classe']

    return label


def generate_metrics(results, true_label):
    accuracy = accuracy_score(true_label, results)
    precision = precision_score(true_label, results, average='macro')
    recall = recall_score(true_label, results, average='macro')
    f1 = f1_score(true_label, results, average='macro')
    return accuracy, precision, recall, f1


def generate_results(teste_df, vetores):
    results = []
    true_label = []
    for index, row in teste_df.iterrows():
        image_path = os.path.join(path, row['id'])
        classe = row['label']

        new_feature_vector = generate_feature_vector(image_path)
        label = comparar_features(new_feature_vector, vetores)

        results.append(label)
        true_label.append(classe)

    accuracy, precision, recall, f1 = generate_metrics(results, true_label)
    return results, true_label, accuracy, precision, recall, f1

def generate_random_samples(csv):
    df = pd.read_csv(csv)
    insert_df = df.groupby('label').apply(lambda x: x.sample(n=5))
    return insert_df

# **TESTS**

In [None]:
np.set_printoptions(threshold=np.inf)

**TESTE 4**

In [None]:
#image path and load csv for feature extraction
#load weights from the first classification model. In this, the model trained before will be used
#this allows to use the same folders, and same weights.
dir = #path were the classification stuf is saved
path = #image path
new_save_path = #new path to save test results
for i in range(1, 6):
  acc1 = []
  prec1 = []
  rec1 = []
  f11 = []

  model = load_model(f'{dir}/model{i}.h5')
  model = Model(inputs=model.input, outputs=model.layers[-2].output)

  insert_df = pd.read_csv(f'{dir}/training_data{i}.csv')
  test_df = pd.read_csv(f'{dir}/validation_data{i}.csv')

  #create features csv
  classe, feature = list_features_classes(insert_df, path)
  vetores_path = create_csv(classe, feature, feature_csv_name = f"vetores_teste{i}.csv")

  results, true_label, accuracy, precision, recall, f1 = generate_results(test_df, vetores_path)

  acc1.append(accuracy)
  prec1.append(precision)
  rec1.append(recall)
  f11.append(f1)

  df = pd.DataFrame({'results': results, 'true_label': true_label})
  df.to_csv(f'{new_save_path}/df{i}.csv', index=False)

acc1 = [np.array(acc1).mean()]
prec1 = [np.array(prec1).mean()]
rec1 = [np.array(rec1).mean()]
f11 = [np.array(f11).mean()]

df = pd.DataFrame({'acc_mean': acc1, 'prec_mean': prec1, 'rec_mean': rec1, 'f1_mean': f11})
df.to_csv(f'{new_save_path}/df_mean.csv', index=False)

**TEST 1, 2, 3**

In [None]:
#image path and load csv for feature extraction
#load weights from the first classification model. In this, the model trained before will be used
#this allows to use the same folders, and same weights.
dir = #path were the classification stuf is saved
path = #image path
new_save_path = #new path to save test results
test_number = #put the number of the test to save results in another folder


#you need to run this 3 times, each time for a different test, remember to change the test number
#you need to go on generate_random_samples() and change n in x.sample(n=5), for test1 n=1, test2 n=3, test 3 n=5
for i in range(1, 6):
  acc1 = []
  prec1 = []
  rec1 = []
  f11 = []
  results1 = []
  true_label1 = []

  for j in range(20):
    model = load_model(f'{dir}/model{i}.h5')
    model = Model(inputs=model.input, outputs=model.layers[-2].output)

    insert_df = generate_random_samples(f'{dir}/training_data{i}.csv')
    test_df = pd.read_csv(f'{dir}/validation_data{i}.csv')

    #create features csv
    classe, feature = list_features_classes(insert_df, path)
    vetores_path = create_csv(classe, feature, feature_csv_name = f"vetores_teste{i}.csv")

    results, true_label, accuracy, precision, recall, f1 = generate_results(test_df, vetores_path)

    acc1.append(accuracy)
    prec1.append(precision)
    rec1.append(recall)
    f11.append(f1)
    results1.append(results)
    true_label1.append(true_label)

  df = pd.DataFrame({'results': results1, 'true_label': true_label1})
  df.to_csv(f'{new_save_path}/test{test_number}/df{i}.csv', index=False)

acc1 = [np.array(acc1).mean()]
prec1 = [np.array(prec1).mean()]
rec1 = [np.array(rec1).mean()]
f11 = [np.array(f11).mean()]

df = pd.DataFrame({'acc_mean': acc1, 'prec_mean': prec1, 'rec_mean': rec1, 'f1_mean': f11})
df.to_csv(f'{new_save_path}/test{test_number}/df_mean.csv', index=False)