In [259]:
import pandas as pd
import numpy as np
from utils import get_label_mappings
import os

DATASET_PATH = "/home/heitor/USP/IC/FAPESP/code_dataset/dataset/Plant_leave_diseases_dataset_without_augmentation"

In [260]:
label_mappings = get_label_mappings(DATASET_PATH)

In [261]:
def get_dataframe(path, identifier):
    df_list = []
    for file in os.listdir(path):
        if file.endswith(".csv"):
            df = pd.read_csv(os.path.join(path, file))
            df = df.drop(columns=["x", "y"])
            df_list.append(df)
            
    df = pd.concat(df_list, ignore_index=True)
    pattern = r'batch_\d{4}/(.*)'

    # Extract the desired part of the path into a new column
    df['image_path'] = df['image_path'].str.extract(pattern)
    # Remove '_marked' from the extracted path
    df['image_path'] = df['image_path'].str.replace('_marked', '')
    df = df.sort_values(by='image_path', ignore_index=True)
    df["identifier"] = identifier

    return df

In [262]:
regular_df = get_dataframe("../../t-SNE/results/regular/dataframes", "regular")
seg_df = get_dataframe("../../t-SNE/results/segmented/dataframes", "segmented")
seg_wb_df = get_dataframe("../../t-SNE/results/segmented_wb/dataframes", "seg_wb")

In [263]:
regular_df

Unnamed: 0,image_path,pred,label,identifier
0,Apple___Apple_scab/image (1).JPG,0,0,regular
1,Apple___Apple_scab/image (10).JPG,0,0,regular
2,Apple___Apple_scab/image (100).JPG,0,0,regular
3,Apple___Apple_scab/image (101).JPG,0,0,regular
4,Apple___Apple_scab/image (102).JPG,0,0,regular
...,...,...,...,...
55417,Tomato___healthy/image (995).JPG,38,38,regular
55418,Tomato___healthy/image (996).JPG,38,38,regular
55419,Tomato___healthy/image (997).JPG,38,38,regular
55420,Tomato___healthy/image (998).JPG,38,38,regular


In [264]:
seg_wb_df

Unnamed: 0,image_path,pred,label,identifier
0,Apple___Apple_scab/image (1).JPG,0,0,seg_wb
1,Apple___Apple_scab/image (10).JPG,3,0,seg_wb
2,Apple___Apple_scab/image (100).JPG,0,0,seg_wb
3,Apple___Apple_scab/image (101).JPG,0,0,seg_wb
4,Apple___Apple_scab/image (102).JPG,0,0,seg_wb
...,...,...,...,...
55339,Tomato___healthy/image (995).JPG,38,38,seg_wb
55340,Tomato___healthy/image (996).JPG,38,38,seg_wb
55341,Tomato___healthy/image (997).JPG,38,38,seg_wb
55342,Tomato___healthy/image (998).JPG,38,38,seg_wb


In [265]:
combined_df = pd.concat([regular_df, seg_wb_df])
duplicates = combined_df.duplicated(subset='image_path', keep=False)
df = combined_df[duplicates].sort_values(by="image_path", ignore_index=True)
df

Unnamed: 0,image_path,pred,label,identifier
0,Apple___Apple_scab/image (1).JPG,0,0,regular
1,Apple___Apple_scab/image (1).JPG,0,0,seg_wb
2,Apple___Apple_scab/image (10).JPG,0,0,regular
3,Apple___Apple_scab/image (10).JPG,3,0,seg_wb
4,Apple___Apple_scab/image (100).JPG,0,0,regular
...,...,...,...,...
105509,Tomato___healthy/image (997).JPG,38,38,regular
105510,Tomato___healthy/image (998).JPG,38,38,seg_wb
105511,Tomato___healthy/image (998).JPG,38,38,regular
105512,Tomato___healthy/image (999).JPG,38,38,regular


In [266]:
filtered_df = df[df.groupby('image_path')['pred'].transform('nunique') > 1]
filtered_df = filtered_df.sort_values(by=['image_path', 'identifier'])
filtered_df.reset_index(drop=True, inplace=True)
filtered_df

Unnamed: 0,image_path,pred,label,identifier
0,Apple___Apple_scab/image (10).JPG,0,0,regular
1,Apple___Apple_scab/image (10).JPG,3,0,seg_wb
2,Apple___Apple_scab/image (104).JPG,0,0,regular
3,Apple___Apple_scab/image (104).JPG,22,0,seg_wb
4,Apple___Apple_scab/image (110).JPG,10,0,regular
...,...,...,...,...
7883,Tomato___healthy/image (931).JPG,38,38,seg_wb
7884,Tomato___healthy/image (982).JPG,38,38,regular
7885,Tomato___healthy/image (982).JPG,25,38,seg_wb
7886,Tomato___healthy/image (986).JPG,38,38,regular


In [267]:
# Update 'image_path' for rows where 'identifier' is 'seg_wb' in filtered_df
filtered_df.loc[filtered_df['identifier'] == 'seg_wb', 'image_path'] = filtered_df.loc[filtered_df['identifier'] == 'seg_wb', 'image_path'].apply(
    lambda path: path.replace('.JPG', '_marked.JPG') if path.endswith('.JPG') else path
)

filtered_df

Unnamed: 0,image_path,pred,label,identifier
0,Apple___Apple_scab/image (10).JPG,0,0,regular
1,Apple___Apple_scab/image (10)_marked.JPG,3,0,seg_wb
2,Apple___Apple_scab/image (104).JPG,0,0,regular
3,Apple___Apple_scab/image (104)_marked.JPG,22,0,seg_wb
4,Apple___Apple_scab/image (110).JPG,10,0,regular
...,...,...,...,...
7883,Tomato___healthy/image (931)_marked.JPG,38,38,seg_wb
7884,Tomato___healthy/image (982).JPG,38,38,regular
7885,Tomato___healthy/image (982)_marked.JPG,25,38,seg_wb
7886,Tomato___healthy/image (986).JPG,38,38,regular


In [268]:
filtered_df.to_csv("transitioned_images.csv")