In [71]:
import pandas as pd
import numpy as np
import os

input_file_paths = [
    r'F:\VERO UTENTE\Desktop\Uni\dissertation\main\K-fold\large-cross-entropy--5e-5\data_all\FIRST\test_results_alldata.csv',
    r'F:\VERO UTENTE\Desktop\Uni\dissertation\main\NO-fold\DeepSeek\thinking\test\deepseek-reasoner_calculators_ev_gemini_1.1_results-temp=0.5.csv',
    r'F:\VERO UTENTE\Desktop\Uni\dissertation\main\NO-fold\Google\non-thinking\test\gemini-2.5-flash-preview-05-20_calculators_results.csv',
    #r'F:\VERO UTENTE\Desktop\Uni\dissertation\main\NO-fold\Grok\non-thinking\test\grok-3-mini-latest_calculators_results.csv',
   # r'F:\VERO UTENTE\Desktop\Uni\dissertation\main\NO-fold\Grok\thinking\test\grok-3-mini-latest_calculators_results-temp=0.4.csv',
]

filter_file_path = r'F:\VERO UTENTE\Desktop\Uni\dissertation\main\data\test_set_30reduced.csv'

data_frames = [pd.read_csv(file_path) for file_path in input_file_paths]
data_filter = pd.read_csv(filter_file_path)

# Get the indices from the filter file
filter_indices = set(data_filter['idx'])

# Check which column is used for indexing across the dataframes
# Find common indices across all dataframes that are also in the filter
common_indices = set(data_frames[0]['index'])
for df in data_frames[1:]:
    common_indices.intersection_update(df['index'])

# Filter using only indices that appear in data_filter
common_indices = common_indices.intersection(filter_indices)

print(f"Number of common indices: {len(common_indices)}")
common_indices_list = sorted(list(common_indices))
print(f"First few common indices: {common_indices_list[:5]}")

CLASS_LABELS = ["indicator", "ideation", "behavior", "attempt"]
# Adjust weights to use all models (example weights)
#weights = [0.738, 0.750, 0.745, 0.774, 0.743]
weights = [0.744, 0.750, 0.745]

# Create new dataframe with common samples
new_df = pd.DataFrame()
new_df['index'] = common_indices_list

# Get the original post and labels from the first dataframe
for idx in common_indices_list:
    mask = data_frames[0]['index'] == idx
    if any(mask):
        row_idx = mask.idxmax()
        if 'post' not in new_df.columns:
            new_df['post'] = ""
            new_df['labels'] = ""
        new_df.loc[new_df['index'] == idx, 'post'] = data_frames[0].loc[row_idx, 'post']
        new_df.loc[new_df['index'] == idx, 'post_risk'] = data_frames[0].loc[row_idx, 'post_risk']

# Create a weighted average of predictions
weighted_sum = np.zeros((len(common_indices_list), len(CLASS_LABELS)))

for i, df in enumerate(data_frames):
    for j, idx in enumerate(common_indices_list):
        mask = df['index'] == idx
        if any(mask):
            row_idx = mask.idxmax()
            pred = df.loc[row_idx, 'predicted_label']
            class_index = CLASS_LABELS.index(pred)
            weighted_sum[j, class_index] += weights[i]

# Determine final predictions
final_predictions = []
for i in range(len(common_indices_list)):
    max_index = np.argmax(weighted_sum[i])
    final_predictions.append(CLASS_LABELS[max_index])

# Add final predictions to the dataframe
new_df['predicted_label'] = final_predictions

# Save the combined predictions
output_path = r'F:\VERO UTENTE\Desktop\Uni\dissertation\main\K-fold\large-cross-entropy--5e-5\ensamble_explore\TEST_alldata_ensemble_gemini+deep+FIRST.csv'
new_df.to_csv(output_path, index=False)

print(f"Combined predictions saved to {output_path}")
print(f"Number of filtered common samples: {len(common_indices_list)}")

# Print a few example predictions
print("\nSample predictions:")
for i in range(min(5, len(new_df))):
    print(f"Index {new_df.iloc[i]['index']}: True={new_df.iloc[i]['labels']}, Predicted={new_df.iloc[i]['predicted_label']}")

Number of common indices: 30
First few common indices: [3, 4, 14, 15, 16]
Combined predictions saved to F:\VERO UTENTE\Desktop\Uni\dissertation\main\K-fold\large-cross-entropy--5e-5\ensamble_explore\TEST_alldata_ensemble_gemini+deep+FIRST.csv
Number of filtered common samples: 30

Sample predictions:
Index 3: True=, Predicted=behavior
Index 4: True=, Predicted=indicator
Index 14: True=, Predicted=indicator
Index 15: True=, Predicted=indicator
Index 16: True=, Predicted=ideation


In [72]:
# First, fix the true labels extraction (add this after creating new_df but before the weighted average calculation)
# Get the original post and labels from the first dataframe
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
import numpy as np


new_df = pd.read_csv(r'F:\VERO UTENTE\Desktop\Uni\dissertation\main\K-fold\large-cross-entropy--5e-5\ensamble_explore\TEST_alldata_ensemble_gemini+deep+FIRST.csv')
#print new_def columns
print(new_df.columns)

for idx in common_indices_list:
    mask = data_frames[0]['index'] == idx
    if any(mask):
        row_idx = mask.idxmax()
        if 'post' not in new_df.columns:
            new_df['post'] = ""
            new_df['true_label_str'] = ""  # Initialize true_label_str column
        new_df.loc[new_df['index'] == idx, 'post'] = data_frames[0].loc[row_idx, 'post']
        if 'post_risk' in data_frames[0].columns:
            new_df.loc[new_df['index'] == idx, 'post_risk'] = data_frames[0].loc[row_idx, 'post_risk']
        if 'true_label_str' in data_frames[0].columns and 'post_risk' in data_frames[0].columns:
            new_df.loc[new_df['index'] == idx, 'post_risk'] = data_frames[0].loc[row_idx, 'post_risk']
        elif 'post_risk' in df.columns:
            new_df.loc[new_df['index'] == idx, 'post_risk'] = df.loc[df['index'] == idx, 'post_risk'].values[0]
        else:
            new_df.loc[new_df['index'] == idx, 'post_risk'] = np.nan
            new_df.loc[new_df['index'] == idx, 'post_risk'] = data_frames[0].loc[row_idx, 'post_risk']

# Add this function and metrics calculation after the ensemble predictions
def calculate_metrics_with_bootstrap(df, n_bootstrap=1000, random_state=42):
    np.random.seed(random_state)
    metrics = {'weighted_f1': [], 'macro_f1': [], 'accuracy': []}
    for _ in range(n_bootstrap):
        sample = df.sample(frac=1, replace=True)
        metrics['weighted_f1'].append(f1_score(sample['post_risk'], sample['predicted_label'], average='weighted'))
        metrics['macro_f1'].append(f1_score(sample['post_risk'], sample['predicted_label'], average='macro'))
        metrics['accuracy'].append(accuracy_score(sample['post_risk'], sample['predicted_label']))
    results = {
        'weighted_f1': (np.mean(metrics['weighted_f1'])*100, np.std(metrics['weighted_f1'])*100),
        'macro_f1': (np.mean(metrics['macro_f1'])*100, np.std(metrics['macro_f1'])*100),
        'accuracy': (np.mean(metrics['accuracy'])*100, np.std(metrics['accuracy'])*100)
    }
    return results

# Calculate metrics for the ensemble
bootstrap_results = calculate_metrics_with_bootstrap(new_df)

# Print the results
print("\nEnsemble Model Performance (with standard deviation via bootstrapping):")
for metric, (mean, std) in bootstrap_results.items():
    print(f"{metric}: {mean:.1f} \pm {std:.1f}")

  print(f"{metric}: {mean:.1f} \pm {std:.1f}")


Index(['index', 'post', 'labels', 'post_risk', 'predicted_label'], dtype='object')

Ensemble Model Performance (with standard deviation via bootstrapping):
weighted_f1: 76.9 \pm 7.7
macro_f1: 80.1 \pm 6.9
accuracy: 77.0 \pm 7.6
