In [1]:
import os
import datetime
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

import torch as th
import torch.nn as nn
import torchvision as tv
from torchvision.transforms import v2
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from ModelsHead import FinalHead

now = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
print(f"now: {now}")

now: 2025-03-27_12:29:15


In [2]:
def read_feature_files_recursively(directory):
    # Initialize an empty list to store DataFrames
    dfs = []
    
    # Walk through the directory recursively
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.ada'):  # Check the file extension
                file_path = os.path.join(root, file)
                try:
                    # Read the file into a DataFrame
                    df = pd.read_csv(file_path, header=None)
                    df = df.T  # Transpose the file before appending
                    
                    # Add a new column 'file_name' to the DataFrame with the name of the file
                    df['file_name'] = file  # Add the file name as a new column
                    
                    # Reorder columns to place 'file_name' at the beginning
                    cols = ['file_name'] + [col for col in df.columns if col != 'file_name']
                    df = df[cols]
                    
                    # Append the DataFrame to the list
                    dfs.append(df)
                    print(f"Successfully read: {file_path}")
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")
    
    # Concatenate all DataFrames into a single DataFrame
    if dfs:
        full_df = pd.concat(dfs, ignore_index=True)
        return full_df
    else:
        return None

In [3]:
directory = '../project_github/adaface_features'
df = read_feature_files_recursively(directory)

# Check the result
if df is not None:
    print(df.head())  # Display the first few rows of the combined DataFrame
else:
    print("No CSV files found.")

Successfully read: ../project_github/adaface_features\subject_a.ada
Successfully read: ../project_github/adaface_features\subject_b.ada
       file_name         0         1         2         3         4         5  \
0  subject_a.ada -0.057427  0.046010  0.034182 -0.023349  0.016623 -0.047561   
1  subject_b.ada -0.034507  0.033995  0.046838 -0.040003 -0.011384 -0.046809   

          6         7         8  ...       502       503       504       505  \
0  0.030808  0.019102  0.041033  ...  0.032285 -0.040341 -0.049573  0.030847   
1  0.036195 -0.004844  0.071372  ... -0.019667  0.022785 -0.045279  0.015532   

        506       507       508       509       510       511  
0 -0.014491 -0.015417 -0.020143 -0.008159 -0.045846  0.015754  
1  0.011882 -0.002304 -0.032385  0.018751 -0.024523  0.032486  

[2 rows x 513 columns]


In [4]:
df_file_name = df[["file_name"]]
df_file_name["file_name"] = df_file_name["file_name"].str.split(".", expand=True).iloc[:,0]

df_features = df.iloc[:,1:]

data = {"labels": [0.0, 1.0]}
df_labels = pd.DataFrame(data)

df_features.columns = ["adaface_feat" + str(col) for col in df_features.columns]
df = pd.concat([df_file_name, df_features, df_labels], axis=1)

df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_file_name["file_name"] = df_file_name["file_name"].str.split(".", expand=True).iloc[:,0]


Unnamed: 0,file_name,adaface_feat0,adaface_feat1,adaface_feat2,adaface_feat3,adaface_feat4,adaface_feat5,adaface_feat6,adaface_feat7,adaface_feat8,...,adaface_feat503,adaface_feat504,adaface_feat505,adaface_feat506,adaface_feat507,adaface_feat508,adaface_feat509,adaface_feat510,adaface_feat511,labels
0,subject_a,-0.057427,0.04601,0.034182,-0.023349,0.016623,-0.047561,0.030808,0.019102,0.041033,...,-0.040341,-0.049573,0.030847,-0.014491,-0.015417,-0.020143,-0.008159,-0.045846,0.015754,0.0
1,subject_b,-0.034507,0.033995,0.046838,-0.040003,-0.011384,-0.046809,0.036195,-0.004844,0.071372,...,0.022785,-0.045279,0.015532,0.011882,-0.002304,-0.032385,0.018751,-0.024523,0.032486,1.0


In [5]:
tensor_features = th.tensor(df_features.values, dtype=th.float32)
tensor_features

tensor([[-0.0574,  0.0460,  0.0342,  ..., -0.0082, -0.0458,  0.0158],
        [-0.0345,  0.0340,  0.0468,  ...,  0.0188, -0.0245,  0.0325]])

In [6]:
if th.cuda.is_available():
  device = "cuda:0"
else:
  device = "cpu"
print("device:",device)

device: cpu


In [7]:
model_frs = FinalHead(512, 1,
                  True)

# model = model.to(th.float64)

In [8]:
model_frs(tensor_features[0])

tensor([-0.1292], grad_fn=<ViewBackward0>)

In [9]:
num_epochs = 1


startTime = datetime.datetime.now()
startTime_str = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
print(f"[INFO] testing iris FFD estimation...{startTime_str}")

model_frs.eval()
scores = []
file_names = []
lab=[]
label_output = []

with th.no_grad():
    for epoch in range(num_epochs):

        batch = tensor_features.to(device)
        # labels = labels.to(device)

        transformed = model_frs.forward(batch)
        label_preds = (transformed.detach().cpu() >= .5).float().tolist()
        # label_preds = 1 if th.sigmoid(transformed).detach().cpu() > 0.5 else 0 #th.argmax(transformed, dim=1)
        scores.append(th.sigmoid(transformed).detach().cpu().tolist())
        # lab.append(labels.detach().cpu().tolist())
        label_output.append(label_preds)
        # file_names.append(df_file_name.values) # cambio de objeto: file to file[0] otherwise the result of the file name is a list of one element (the name of the file lol...)
    

endTime = datetime.datetime.now()
endTime_str = endTime.strftime('%Y-%m-%d_%H:%M:%S')
print("[INFO] end time...", str(endTime_str))
finish_time = endTime-startTime
print("[INFO] running time...", str(finish_time))

[INFO] testing iris FFD estimation...2025-03-27_12:29:15
[INFO] end time... 2025-03-27_12:29:15
[INFO] running time... 0:00:00.001878


In [10]:
scores = [item[0] for sublist in scores for item in sublist]
scores

[0.46775510907173157, 0.4683072566986084]

In [11]:
label_output = [item[0] for sublist in label_output for item in sublist]
label_output

[0.0, 0.0]

In [12]:
df_scores = pd.DataFrame(scores, columns=["frs-scores"])
df_scores

Unnamed: 0,frs-scores
0,0.467755
1,0.468307


In [13]:
df_pred_lab = pd.DataFrame(label_output, columns=["pred-lab"])
df_pred_lab


Unnamed: 0,pred-lab
0,0.0
1,0.0


In [14]:
pred_df = pd.concat([df_file_name, df_labels, df_scores], axis=1)
pred_df

Unnamed: 0,file_name,labels,frs-scores
0,subject_a,0.0,0.467755
1,subject_b,1.0,0.468307


#### **EffB2 FFD estimation**
_____

In [15]:
model_eff =  tv.models.efficientnet_b2()

model_eff.classifier = nn.Sequential(
                                                nn.Linear(in_features=1408, out_features=512, bias=True),
                                                nn.Linear(in_features=512, out_features=256, bias=True),
                                                nn.ReLU(),
                                                nn.Linear(in_features=256, out_features=128, bias=True),
                                                nn.ReLU(),
                                                nn.Linear(in_features=128, out_features=64, bias=True),
                                                nn.ReLU(),
                                                nn.Linear(in_features=64, out_features=1, bias=True),
                                                )

In [16]:
data_path = "../project_github/img_examples"

transf_ = v2.Compose([
    v2.ToTensor(),  # Ensure that images are converted to tensors
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize using ImageNet values])
])

data_ = ImageFolder(root=data_path, transform=transf_) # Load images from the folder without transformations
train_loader = DataLoader(data_, batch_size=1)



In [17]:
# Example of iterating through the train_loader
for inputs, labels in train_loader:
    print(inputs.shape)  # Shape of the input batch (e.g., [32, 3, 224, 224])
    print(labels.shape)  # Shape of the labels batch (e.g., [32])
    break  # Just to check the first batch

torch.Size([1, 3, 112, 112])
torch.Size([1])


In [18]:
num_epochs = 1


startTime = datetime.datetime.now()
startTime_str = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
print(f"[INFO] testing iris FFD estimation...{startTime_str}")

model_eff.eval()
scores = []
file_names = []
lab=[]
label_output = []

with th.no_grad():
    for batch, labels in (pbar:=tqdm(train_loader, desc="Batch", position=0)):

        batch = batch.to(device)
        # batch = batch.view(1, 2, 512,1)
        # labels = labels.to(device)

        transformed = model_eff.forward(batch)
        label_preds = (transformed.detach().cpu() >= .5).float().tolist()
        # label_preds = 1 if th.sigmoid(transformed).detach().cpu() > 0.5 else 0 #th.argmax(transformed, dim=1)
        scores.append(th.sigmoid(transformed).detach().cpu().tolist())
        # lab.append(labels.detach().cpu().tolist())
        label_output.append(label_preds)
        # file_names.append(df_file_name.values) # cambio de objeto: file to file[0] otherwise the result of the file name is a list of one element (the name of the file lol...)
    

endTime = datetime.datetime.now()
endTime_str = endTime.strftime('%Y-%m-%d_%H:%M:%S')
print("[INFO] end time...", str(endTime_str))
finish_time = endTime-startTime
print("[INFO] running time...", str(finish_time))

[INFO] testing iris FFD estimation...2025-03-27_12:29:15


Batch:   0%|          | 0/2 [00:00<?, ?it/s]

[INFO] end time... 2025-03-27_12:29:15
[INFO] running time... 0:00:00.087352


In [19]:
eff_scores = [item[0] for sublist in scores for item in sublist]
eff_scores

[0.4885897934436798, 0.4885897934436798]

In [20]:
eff_label_output = [item[0] for sublist in label_output for item in sublist]
eff_label_output

[0.0, 0.0]

In [21]:
effb2_pred_df = pd.DataFrame(data_.imgs, columns=["file_name", "labels"])

effb2_pred_df["file_name"] = effb2_pred_df["file_name"].str.replace("\\", "/", regex=False)
effb2_pred_df["file_name"] = effb2_pred_df["file_name"].str.split("/", expand=True).iloc[:,4]
effb2_pred_df["file_name"] = effb2_pred_df["file_name"].str.split(".", expand=True).iloc[:,0]

eff_scores_df = pd.DataFrame(eff_scores, columns=["effb2-scores"])
# eff_label_output_df = pd.DataFrame(eff_label_output, columns=["eff-scores"])

effb2_pred_df = pd.concat([effb2_pred_df, eff_scores_df], axis=1)

effb2_pred_df

Unnamed: 0,file_name,labels,effb2-scores
0,subject_a,0,0.48859
1,subject_b,1,0.48859


### **Mean and Max FFD estimation per subject**
___

In [22]:
final_pred_df = pd.merge(effb2_pred_df, pred_df[["file_name","frs-scores"]], 
         how="left", on="file_name")

final_pred_df["max-scores"] = final_pred_df[["effb2-scores", "frs-scores"]].max(axis=1)
final_pred_df["mean-scores"] = final_pred_df[["effb2-scores", "frs-scores"]].mean(axis=1)

final_pred_df["max-pred-lab"] = (final_pred_df["max-scores"] >= .5).astype(int)
final_pred_df["mean-pred-lab"] = (final_pred_df["mean-scores"] >= .5).astype(int)

final_pred_df

Unnamed: 0,file_name,labels,effb2-scores,frs-scores,max-scores,mean-scores,max-pred-lab,mean-pred-lab
0,subject_a,0,0.48859,0.467755,0.48859,0.478172,0,0
1,subject_b,1,0.48859,0.468307,0.48859,0.478449,0,0
