In [None]:
!pip install -q pandas numpy matplotlib seaborn

# Analysis of Evaluation Results

This notebook analyzes the results from the evaluation of different models, with special focus on converting tensor data to numerical values.

In [2]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import ast
import re

# Set style for better visualizations
sns.set_theme()
sns.set_palette('husl')

In [None]:
results_path = "llama_3.2_11B_20250422_052647/results.csv"
df = pd.read_csv(results_path)


# Function to convert tensor string to number
def convert_tensor_to_number(tensor_str):
    try:
        # First try to find a number in the format tensor(number, dtype=...)
        match = re.search(r"tensor\(([-+]?\d*\.?\d+),", tensor_str)
        if match:
            return float(match.group(1))

        # If that doesn't work, try to find any number in the string
        numbers = re.findall(r"[-+]?\d*\.?\d+", tensor_str)
        if numbers:
            return float(numbers[0])

        return np.nan
    except:
        return np.nan


# Convert extracted_label column
df["extracted_label_numeric"] = df["extracted_label"].apply(convert_tensor_to_number)

# Display basic information about the dataset
print("Dataset Info:")
print("-" * 50)
print(f"Number of rows: {len(df)}")
print(f"Number of columns: {len(df.columns)}")
print("\nColumns in the dataset:")
print(df.columns.tolist())
print("\nFirst few rows:")
df.head()

In [None]:
# Basic statistics of the converted numeric labels
print("Statistics of Extracted Labels:")
print("-" * 50)
print(df['extracted_label_numeric'].describe())

# Check for any conversion errors
print("\nNumber of failed conversions:")
print(df['extracted_label_numeric'].isna().sum())

In [None]:
# Create visualizations for the extracted labels
plt.figure(figsize=(15, 10))

# Plot 1: Distribution of extracted labels
plt.subplot(2, 2, 1)
sns.histplot(data=df, x='extracted_label_numeric', bins=30)
plt.title('Distribution of Extracted Labels')

# Plot 2: Box plot of extracted labels
plt.subplot(2, 2, 2)
sns.boxplot(data=df, y='extracted_label_numeric')
plt.title('Box Plot of Extracted Labels')

# Plot 3: Time series of extracted labels
plt.subplot(2, 2, 3)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp')['extracted_label_numeric'].plot()
plt.title('Extracted Labels Over Time')

# Plot 4: Scatter plot of time between frames vs extracted label
plt.subplot(2, 2, 4)
sns.scatterplot(data=df, x='time_between_frames', y='extracted_label_numeric')
plt.title('Time Between Frames vs Extracted Label')

plt.tight_layout()
plt.show()

In [None]:
# Additional analysis: Compare target vs extracted label
if 'target' in df.columns:
    # Convert target to numeric if it's not already
    df['target_numeric'] = pd.to_numeric(df['target'], errors='coerce')
    
    plt.figure(figsize=(10, 6))
    plt.scatter(df['target_numeric'], df['extracted_label_numeric'], alpha=0.5)
    plt.plot([df['target_numeric'].min(), df['target_numeric'].max()], 
             [df['target_numeric'].min(), df['target_numeric'].max()], 
             'r--', label='Perfect Prediction')
    plt.xlabel('Target Value')
    plt.ylabel('Extracted Label')
    plt.title('Target vs Extracted Label')
    plt.legend()
    plt.show()
    
    # Calculate error metrics
    mae = np.mean(np.abs(df['target_numeric'] - df['extracted_label_numeric']))
    rmse = np.sqrt(np.mean((df['target_numeric'] - df['extracted_label_numeric'])**2))
    
    print(f"Mean Absolute Error: {mae:.4f}")
    print(f"Root Mean Square Error: {rmse:.4f}")