# DataFrames Formatting - Extended Validation

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
PATH_EVALUATIONS = "./evaluations"
PATH_EV_METRICS = PATH_EVALUATIONS + "/extended-validation"
METRICS_BRACOL = PATH_EV_METRICS + "/bracol/metrics_bracol.csv"
METRICS_PLANT_PATOLOGIES = PATH_EV_METRICS + "/plant_patologies/metrics_plant_patologies.csv"
METRICS_ROCOLE = PATH_EV_METRICS + "/rocole/metrics_rocole.csv"

## 1. Opening dataframes

### 1.1. BRACOL

In [4]:
df_bracol = pd.read_csv(METRICS_BRACOL)
df_bracol.head()

Unnamed: 0,Early Stopping,Epochs,Test Accuracy,Test F1 Weightet,Test Precision Weighted,Test Recall Weighted
0,True,96,0.826866,0.824443,0.830976,0.826866
1,False,300,0.862687,0.860581,0.863572,0.862687


### 1.2. Plant Patologies

In [5]:
df_plant_patologies = pd.read_csv(METRICS_PLANT_PATOLOGIES)
df_plant_patologies.head()

Unnamed: 0,Early Stopping,Epochs,Test Accuracy,Test F1 Weightet,Test Precision Weighted,Test Recall Weighted
0,True,31,0.522523,0.358655,0.27303,0.522523
1,False,300,0.936937,0.936957,0.93713,0.936937


### 1.3. RoCoLe

In [6]:
df_rocole = pd.read_csv(METRICS_ROCOLE)
df_rocole.head()

Unnamed: 0,Early Stopping,Epochs,Test Accuracy,Test F1 Weightet,Test Precision Weighted,Test Recall Weighted
0,True,59,0.584127,0.548807,0.546922,0.584127
1,False,300,0.587302,0.580185,0.577721,0.587302


## 2. Selecting datasets

In [7]:
datasets = ["BRACOL", "Plant Patologies", "RoCoLe"]
datasets

['BRACOL', 'Plant Patologies', 'RoCoLe']

## 3. Defining columns and index

In [8]:
columns = ["Datasets", "Samples"]

for column in df_bracol.columns:
    columns.append(column)

columns

['Datasets',
 'Samples',
 'Early Stopping',
 'Epochs',
 'Test Accuracy',
 'Test F1 Weightet',
 'Test Precision Weighted',
 'Test Recall Weighted']

In [9]:
index = [i for i in range(1, 7)]

## 4. Saving all metrics in a general dataframe

In [10]:
df_extended_validation = pd.DataFrame(columns=columns, index=index)
df_extended_validation["Datasets"] = datasets*2
df_extended_validation["Samples"] = [2209, 542, 1560]*2
df_extended_validation = df_extended_validation.sort_values("Datasets", ignore_index=True)
df_extended_validation

Unnamed: 0,Datasets,Samples,Early Stopping,Epochs,Test Accuracy,Test F1 Weightet,Test Precision Weighted,Test Recall Weighted
0,BRACOL,2209,,,,,,
1,BRACOL,2209,,,,,,
2,Plant Patologies,542,,,,,,
3,Plant Patologies,542,,,,,,
4,RoCoLe,1560,,,,,,
5,RoCoLe,1560,,,,,,


In [11]:
for dataset in datasets:
    for column in columns[2:]:
        if dataset == "BRACOL":
            df_extended_validation[column].loc[df_extended_validation["Datasets"] == dataset] = df_bracol[column].values
        elif "Plant" in dataset:
            df_extended_validation[column].loc[df_extended_validation["Datasets"] == dataset] = df_plant_patologies[column].values
        else:
            df_extended_validation[column].loc[df_extended_validation["Datasets"] == dataset] = df_rocole[column].values

In [12]:
df_extended_validation.to_csv(PATH_EVALUATIONS + "/metrics_extended_validation.csv", header=True, index=False)

## 5. Showing the best results

In [13]:
df_extended_validation = df_extended_validation.sort_values("Test F1 Weightet", ascending=False, ignore_index=True)
df_extended_validation.head(6)

Unnamed: 0,Datasets,Samples,Early Stopping,Epochs,Test Accuracy,Test F1 Weightet,Test Precision Weighted,Test Recall Weighted
0,Plant Patologies,542,False,300,0.936937,0.936957,0.93713,0.936937
1,BRACOL,2209,False,300,0.862687,0.860581,0.863572,0.862687
2,BRACOL,2209,True,96,0.826866,0.824443,0.830976,0.826866
3,RoCoLe,1560,False,300,0.587302,0.580185,0.577721,0.587302
4,RoCoLe,1560,True,59,0.584127,0.548807,0.546922,0.584127
5,Plant Patologies,542,True,31,0.522523,0.358655,0.27303,0.522523
