In [None]:
import os
import torch
import torch.nn.functional as F
import torch.nn as nn
import pandas as pd
import numpy as np
from torch_geometric.nn import GATConv
from torch_geometric.data import Data, DataLoader, Batch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from sklearn.neighbors import KDTree
from scipy.spatial.distance import pdist
from torch.utils.data.dataset import random_split

In [None]:
# Load the data
df = pd.read_csv("/Users/mac/Desktop/gnn/archive/consolidated_train.csv", header=None )
df = df.iloc[1:, :]
df.columns = ['frame_num', 'identity_num', 'bbox_top', 'bbox_left', 'bbox_width',
              'bbox_height', 'object_category', 'occlusion', 'truncation', 'object_pose']

# Data Preprocessing
le = LabelEncoder()
df['object_pose'] = le.fit_transform(df['object_pose'])
df[['bbox_top', 'bbox_left', 'bbox_width', 'bbox_height']] = df[['bbox_top', 'bbox_left', 'bbox_width', 'bbox_height']].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=['bbox_top', 'bbox_left', 'bbox_width', 'bbox_height'])
# Ensure bounding box dimensions are positive
df = df[(df['bbox_width'] > 0) & (df['bbox_height'] > 0)]
# Checking class distribution
print(df['object_pose'].value_counts())
df['object_pose'] = pd.to_numeric(df['object_pose'], errors='coerce', downcast='integer')

# You might decide on further actions based on the output
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['object_pose'], random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.25, stratify=train_df['object_pose'], random_state=42)  # 0.25 x 0.8 = 0.2
# Statistical summary
print(df.describe())

In [None]:
def construct_graph_for_frame(df_frame):
    # Use a copy to avoid SettingWithCopyWarning
    df_frame = df_frame.copy()

    nodes = df_frame[['bbox_top', 'bbox_left', 'bbox_width', 'bbox_height']].values
    centers = np.column_stack([
        df_frame['bbox_left'] + df_frame['bbox_width'] / 2, 
        df_frame['bbox_top'] + df_frame['bbox_height'] / 2
    ])
    kdtree = KDTree(centers)
    distances = pdist(centers, metric='euclidean')
    threshold = np.percentile(distances, q=90)  
    
    indices_list = kdtree.query_radius(centers, r=threshold)
    
    spatial_edges = set()
    for i, indices in enumerate(indices_list):
        edges = [(i, j) for j in indices if i != j]  
        spatial_edges.update(edges)
    edge_index = torch.tensor(list(spatial_edges), dtype=torch.long).t().contiguous()
    x = torch.tensor(nodes, dtype=torch.float)

    # Adjusted lines to handle SettingWithCopyWarning
    df_frame.loc[:, 'object_pose'] = pd.to_numeric(df_frame['object_pose'], errors='coerce', downcast='integer')
    df_frame.loc[:, 'object_pose'].fillna(-1, inplace=True)
    
    y = torch.tensor(df_frame['object_pose'].astype(int).values, dtype=torch.long)

    return Data(x=x, edge_index=edge_index, y=y)

def construct_temporal_graph_for_frame(df_frame, df_next_frame):
    nodes = df_frame[['bbox_top', 'bbox_left', 'bbox_width', 'bbox_height']].values
    common_ids = set(df_frame['identity_num']).intersection(df_next_frame['identity_num'])
    current_indices = np.where(df_frame['identity_num'].isin(common_ids))[0]
    next_indices = np.where(df_next_frame['identity_num'].isin(common_ids))[0] + len(df_frame)
    
    temporal_edges = list(zip(current_indices, next_indices))
    
    edge_index = torch.tensor(temporal_edges, dtype=torch.long).t().contiguous()
    x = torch.tensor(nodes, dtype=torch.float)
    y = torch.tensor(df_frame['object_pose'].values, dtype=torch.long)
    
    return Data(x=x, edge_index=edge_index, y=y)





In [None]:
class GNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes):
        super(GNN, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels)
        self.conv2 = GATConv(hidden_channels, num_classes)  

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.4, training=self.training)  
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

    # 3. Loss Function
def detection_loss(pred_class_scores, true_class_labels):
    classification_loss = F.cross_entropy(pred_class_scores, true_class_labels)
    return classification_loss


In [None]:
def generate_graphs(folder='results'):
    """Generate graphs one by one from saved chunks."""
    saved_files = [f for f in os.listdir(folder) if f.startswith('result_batch_')]
    for file_name in saved_files:
        yield torch.load(os.path.join(folder, file_name))
# Training and Validation Functions
def train_epoch(loader, model, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    correct = 0  
    total = 0
    for data in loader:  # Use provided loader instead of fixed train_loader
        data, target = data.to(device), data.y.to(device)
        optimizer.zero_grad()
        outputs = model(data.x, data.edge_index)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()
    
    average_loss = total_loss / len(loader)
    train_accuracy = correct / total  
    val_accuracy , val_f1 = validate(val_loader, model, device, criterion)  # Ensure validate function is defined with proper parameters
    
    return average_loss, train_accuracy

def validate(loader, model, device, criterion):
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    for data in loader:  # Use provided loader instead of fixed val_loader
        data = data.to(device)
        with torch.no_grad():
            outputs = model(data.x, data.edge_index)
            _, predicted = torch.max(outputs, 1)
            total += data.y.size(0)
            correct += (predicted == data.y).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(data.y.cpu().numpy())
    
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return correct / total, f1



def validation_loss(val_loader, model, criterion, device):
    model.eval()
    total_loss = 0.0
    for data in val_loader:
        data, target = data.to(device), data.y.to(device)
        with torch.no_grad():
            outputs = model(data.x, data.edge_index)
            loss = criterion(outputs, target)

            total_loss += loss.item()
    average_loss = total_loss / len(val_loader)
    return average_loss



In [None]:
import os
import pandas as pd
import torch

def generate_graphs(folder='results'):
    graphs = []
    
    for i, file in enumerate(os.listdir(folder)):
        data_path = os.path.join(folder, file)

        # Check file extension and handle accordingly
        file_ext = os.path.splitext(file)[1]
        
        if file_ext == ".csv":
            try:
                df = pd.read_csv(data_path)
                # Your graph generation code here...

            except UnicodeDecodeError:
                try:
                    df = pd.read_csv(data_path, encoding='ISO-8859-1')
                    # Your graph generation code here...

                except Exception as e:
                    print(f"Could not read {data_path} due to {str(e)}")
            except pd.errors.ParserError:
                print(f"{data_path} is not a valid CSV or is empty.")
            except Exception as e:
                print(f"An unexpected error occurred: {str(e)}")
            
        elif file_ext == ".pt":
            try:
                # Load your PyTorch tensor or model
                data = torch.load(data_path)
                # Your code to handle PyTorch data here...

            except Exception as e:
                print(f"Could not load PyTorch file {data_path} due to {str(e)}")
                
        else:
            print(f"Unsupported file type {file_ext} for file {data_path}")
    return graphs

# Usage
graphs = generate_graphs('/Users/mac/Desktop/gnn/PFE_Feryal/results')


In [None]:
print(f"Batch Size: 100, Train Graphs Length: {len(train_graphs)}, Validation Graphs Length: {len(val_graphs)}")



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GNN(in_channels=64, hidden_channels=32, num_classes=10).to(device)  # Adjust parameters as needed
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()  # Or your specific loss
train_size = int(0.8 * len(graphs))
val_size = len(graphs) - train_size
train_graphs, val_graphs = random_split(graphs, [train_size, val_size])

train_loader = DataLoader(train_graphs, batch_size=100, shuffle=True, collate_fn=Batch.from_data_list)
val_loader = DataLoader(val_graphs, batch_size=100, shuffle=False, collate_fn=Batch.from_data_list)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
# Your data loading and splitting logic here
num_epochs = 100
train_loss_history, val_loss_history = [], []

for epoch in range(num_epochs):
    train_loss = train_epoch(train_loader, model, optimizer, criterion, device)
    val_loss = validate(val_loader, model, device, criterion)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
    
    train_loss_history.append(train_loss)
    val_loss_history.append(val_loss)


In [None]:
import os
import torch
from torch_geometric.data import Batch

def generate_graphs(folder='results'):
    """Generate graphs one by one from saved chunks."""
    saved_files = [f for f in os.listdir(folder) if f.startswith('result_batch_')]
    for file_name in saved_files:
        yield torch.load(os.path.join(folder, file_name))

# Hyperparameters & Optimizers
learning_rate = 0.01
weight_decay = 1e-5
hidden_channels = 16
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_dim = 4
num_classes = df['object_pose'].nunique()  # Ensure df is defined

# Model
model = GNN(input_dim, hidden_channels, num_classes).to(device)

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Criterion/Loss
criterion = torch.nn.CrossEntropyLoss()

# Ensure model is in evaluation mode
model.eval()

# Loop through all graphs

for graph in generate_graphs():
    # Ensure graph is in the right format for your GNN and create a Batch.
    data = Batch.from_data_list([graph]).to(device)

    # Perform a forward pass through the model
    with torch.no_grad():
        outputs = model(data.x, data.edge_index)

        # Calculating softmax probabilities and getting predicted classes
        probabilities = F.softmax(outputs, dim=1)
        _, predicted_classes = torch.max(probabilities, 1)

    # Displaying results
    print("Outputs:", outputs)
    print("Probabilities:", probabilities)
    print("Predicted classes:", predicted_classes)

    # Optionally: you might want to clear memory between graphs
    del graph, data, outputs, probabilities, predicted_classes
    torch.cuda.empty_cache()


In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.data import DataLoader
from sklearn.metrics import f1_score

# Assuming you have df, training_data, val_data
num_classes = df['object_pose'].nunique()  # Ensure df is defined
input_dim = 4
hidden_channels = 16
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model initialization, Criterion, and Optimizer definition remains unchanged.

# Loaders
train_loader = DataLoader(training_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

# Training and Validation Functions
def train_epoch(loader, model, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    correct = 0  
    total = 0
    for data in loader:  # Use provided loader instead of fixed train_loader
        data, target = data.to(device), data.y.to(device)
        optimizer.zero_grad()
        outputs = model(data.x, data.edge_index)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()
    
    average_loss = total_loss / len(loader)
    train_accuracy = correct / total  
    val_accuracy , val_f1 = validate(val_loader, model, device, criterion)  # Ensure validate function is defined with proper parameters
    
    return average_loss, train_accuracy

def validate(loader, model, device, criterion):
    model.eval()
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    for data in loader:  # Use provided loader instead of fixed val_loader
        data = data.to(device)
        with torch.no_grad():
            outputs = model(data.x, data.edge_index)
            _, predicted = torch.max(outputs, 1)
            total += data.y.size(0)
            correct += (predicted == data.y).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(data.y.cpu().numpy())
    
    f1 = f1_score(all_labels, all_preds, average='weighted')
    return correct / total, f1


# Epoch loop
num_epochs = 20 
for epoch in range(num_epochs):
    train_epoch(train_loader, model, optimizer, criterion, device)
    validate(val_loader, model, device, criterion)


In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(training_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
train_temporal_loader = DataLoader(train_temporal_data, batch_size=32, shuffle=True)
val_temporal_loader = DataLoader(val_temporal_data, batch_size=32, shuffle=False)


def run_epoch(epoch, loader, model, optimizer, criterion, device, mode="Spatial"):
    train_loss, train_accuracy = train_epoch(loader, model, optimizer, criterion, device)
    val_loss = validation_loss(val_loader, model, criterion, device)
    val_accuracy, val_f1 = validate(val_loader, model, device, criterion)
    
    print(f"[{mode}] Epoch {epoch + 1}, Train Loss: {train_loss:.9f}, Train Accuracy: {train_accuracy:.9f}, Val Accuracy: {val_accuracy:.9f}, Validation F1 Score: {val_f1:.9f}")

    return train_accuracy, val_accuracy, val_f1

num_epochs = 20 

spatial_accuracies = []
train_accuracies = []
spatial_f1_scores = []

temp_train_accuracies = []
temporal_accuracies = []
temporal_f1_scores = []

for epoch in range(num_epochs):
    train_acc, val_acc, val_f1 = run_epoch(epoch, train_loader, model, optimizer, criterion_class, device, "Spatial")
    spatial_accuracies.append(val_acc)
    train_accuracies.append(train_acc)
    spatial_f1_scores.append(val_f1)
    
    # Assuming train_temporal_loader and val_temporal_loader are defined properly
    temp_train_acc, temp_val_acc, temp_val_f1 = run_epoch(epoch, train_temporal_loader, model, optimizer, criterion_class, device, "Temporal")
    temporal_accuracies.append(temp_val_acc)
    temp_train_accuracies.append(temp_train_acc)
    temporal_f1_scores.append(temp_val_f1)


In [None]:
unique_frames = df['frame_num'].unique()
graphs = [construct_graph_for_frame(df[df['frame_num'] == frame]) for frame in unique_frames]

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Assuming model, optimizer, criterion are defined elsewhere
model = model.to(device)

criterion_class = nn.CrossEntropyLoss()  # For classification
criterion_bbox = nn.MSELoss()  # For bounding box regression, assuming you want to use Mean Square Error. 
# For spatial graphs:

spatial_accuracies = []
train_accuracies = []  # List to hold training accuracies
spatial_f1_scores = []

val_loss_history = []

best_val_loss = float('inf')  # Set to a very high value for initialization
counter = 0
patience =5  # Assuming a patience of 5; 

num_epochs = 20 # Example: Ensure you define this parameter

for epoch in range(num_epochs):
    train_loss, train_accuracy = train_epoch(train_loader, model, optimizer, criterion, device)
    val_loss = validation_loss(val_loader, model, criterion, device)  # Ensure this function is defined/used consistently
    val_loss_history.append(val_loss)
    val_accuracy = validate(val_loader, model, device, criterion)
    spatial_accuracies.append(val_accuracy)
    train_accuracies.append(train_accuracy)  # Append the training accuracy
    
    val_accuracy, val_f1 = validate(val_loader, model, device, criterion)
    spatial_f1_scores.append(val_f1)  # Storing the F1 score for visualization

    
    print(f"[Spatial] Epoch {epoch + 1}, Train Loss: {train_loss:.9f}, Train Accuracy: {train_accuracy:.9f}, Val Accuracy: {val_accuracy:.9f} , Validation F1 Score: {val_f1:.9f}")
    
# For temporal graphs:
temp_train_accuracies = [] 
temporal_graphs = []
temporal_f1_scores = []
for i in range(len(unique_frame_numbers) - 1):
    current_frame = df[df['frame_num'] == unique_frame_numbers[i]]
    next_frame = df[df['frame_num'] == unique_frame_numbers[i+1]]
    graph = construct_temporal_graph_for_frame(current_frame, next_frame)
    temporal_graphs.append(graph)


    
train_temporal_loader = DataLoader(train_temporal_graphs, batch_size=32, shuffle=True)
val_temporal_loader = DataLoader(val_temporal_graphs, batch_size=32, shuffle=False)
temporal_accuracies = []
for epoch in range(num_epochs):
    
    train_loss, train_accuracy = train_epoch(train_loader, model, optimizer, criterion_class, device)

   
    
    temporal_accuracies.append(val_accuracy)
    temp_train_accuracies.append(train_accuracy)  # Append the training accuracy
    val_accuracy, val_f1 = validate(val_loader, model, device, criterion)
    temporal_f1_scores.append(val_f1)  # Storing the F1 score for visualization

    
    
    print(f"[Temporal] Epoch {epoch + 1}, Train Loss: {train_loss:.9f}, Train Accuracy: {train_accuracy:.9f}, Val Accuracy: {val_accuracy:.9f},Validation F1 Score: {val_f1:.9f}")
    
    



# Visualization
plt.figure(figsize=(10, 6))
plt.plot(train_accuracies, '--', label="Spatial Training Accuracy")
plt.plot(spatial_accuracies, label="Spatial Validation Accuracy")
plt.plot(temp_train_accuracies, '--', label="Temporal Training Accuracy")
plt.plot(temporal_accuracies, label="Temporal Validation Accuracy")
plt.plot(spatial_f1_scores, '-.', label="Spatial Validation F1 Score")  # New line for F1 score visualization
plt.plot(temporal_f1_scores, ':', label="Temporal Validation F1 Score")  # New line for F1 score visualization
plt.xlabel('Epoch')
plt.ylabel('Metric Value')
plt.legend()
plt.title('Performance Comparison')
plt.savefig('Performance Comparison spa vs temp ALLdatasetgat.png')
plt.show()




In [None]:
# Saving the model
model_save_path = "/Users/mac/Desktop/gnn/archive/Performance Comparison.pth"
torch.save(model.state_dict(), model_save_path)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader


scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=patience, factor=0.1, verbose=True)

# Criterion/Loss
criterion = nn.CrossEntropyLoss()

# Visualization Lists
train_accuracies, spatial_accuracies, spatial_f1_scores = [], [], []
temp_train_accuracies, temporal_accuracies, temporal_f1_scores = [], [], []




# DataLoader Definitions
train_spatial_loader = DataLoader(train_spatial_dataset, batch_size=32, shuffle=True)
val_spatial_loader = DataLoader(val_spatial_dataset, batch_size=32, shuffle=False)

train_temporal_loader = DataLoader(train_temporal_dataset, batch_size=32, shuffle=True)
val_temporal_loader = DataLoader(val_temporal_dataset, batch_size=32, shuffle=False)


# Early stopping initialization
best_val_loss = float('inf')
counter = 0

# Training loop for Spatial Graphs
for epoch in range(num_epochs):
    # Training
    train_loss, train_accuracy = train_epoch(train_loader, model, optimizer, criterion, device)
    # Validation
    val_loss = validation_loss(val_loader, model, criterion, device)  
    val_accuracy, val_f1 = validate(val_loader, model, device, criterion)
    
    # Visualization Data
    train_accuracies.append(train_accuracy)
    spatial_accuracies.append(val_accuracy)
    spatial_f1_scores.append(val_f1)  
    
    # Logging
    print(f"[Spatial] Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Accuracy: {val_accuracy:.4f}, Validation F1 Score: {val_f1:.4f}")
    
    # Scheduler and Early stopping
    scheduler.step(val_loss)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_spatial_model_weights.pth")
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered for spatial training.")
            break

# Ensure temporal graphs are prepared and loaders (train_temporal_loader and val_temporal_loader) are defined

# Resetting early stopping and best_val_loss
best_val_loss = float('inf')
counter = 0

# Training loop for Temporal Graphs
for epoch in range(num_epochs):
    # Training
    train_loss, train_accuracy = train_epoch(train_temporal_loader, model, optimizer, criterion, device)
    # Validation
    val_loss = validation_loss(val_temporal_loader, model, criterion, device)  
    val_accuracy, val_f1 = validate(val_temporal_loader, model, device, criterion)
    
    # Visualization Data
    temp_train_accuracies.append(train_accuracy)
    temporal_accuracies.append(val_accuracy)
    temporal_f1_scores.append(val_f1)  
    
    # Logging
    print(f"[Temporal] Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Accuracy: {val_accuracy:.4f}, Validation F1 Score: {val_f1:.4f}")
    
    # Scheduler and Early stopping
    scheduler.step(val_loss)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_temporal_model_weights.pth")
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered for temporal training.")
            break

# Visualization
plt.figure(figsize=(10, 6))
plt.plot(train_accuracies, '--', label="Spatial Training Accuracy")
plt.plot(spatial_accuracies, label="Spatial Validation Accuracy")
plt.plot(temp_train_accuracies, '--', label="Temporal Training Accuracy")
plt.plot(temporal_accuracies, label="Temporal Validation Accuracy")
plt.plot(spatial_f1_scores, '-.', label="Spatial Validation F1 Score")
plt.plot(temporal_f1_scores, ':', label="Temporal Validation F1 Score")
plt.xlabel('Epoch')
plt.ylabel('Metric Value')
plt.legend()
plt.title('Performance Comparison: Spatial vs Temporal')
plt.savefig('Performance_Comparison_spa_vs_temp_ALLdatasetgat.png')
plt.show()

In [None]:
import os
import torch
from joblib import Parallel, delayed
import pandas as pd

# Assume 'construct_graph_for_frame' and 'process_frame' functions are defined as per your previous codes

def save_result_to_disk(result, batch_index, folder='results'):
    """Save the result object to disk using torch.save."""
    if not os.path.exists(folder):
        os.makedirs(folder)
    file_path = os.path.join(folder, f'result_batch_{batch_index}.pt')
    torch.save(result, file_path)

def process_frame(frame_num, df_frame):
    return frame_num, construct_graph_for_frame(df_frame)

def process_and_save(df, chunk_size):
    """Process data in chunks and save each chunk's result to disk."""
    num_chunks = len(df) // chunk_size + (1 if len(df) % chunk_size != 0 else 0)
    
    for i in range(num_chunks):
        # Extract the chunk of data
        chunk_data = df.iloc[i*chunk_size : (i+1)*chunk_size]
        
        # Process the chunk
        frame_num, result = process_frame(i, chunk_data)
        
        # Save the result to disk
        save_result_to_disk(result, frame_num)

# Example usage:
# Assuming df is your DataFrame
# Note: Please ensure df is defined in your actual use-case before running this code snippet
chunk_size = 1000
process_and_save(df, chunk_size)
