In [1]:
import os
import pandas as pd
import numpy as np

# Directory containing the CSV files
results_dir = '../Results/UserStudy'

# List to hold data from all CSV files
all_responses = []

# Iterate over all files in the directory
for filename in os.listdir(results_dir):
    if filename.endswith('.csv'):
        file_path = os.path.join(results_dir, filename)
        # Read the CSV file and append its data to the list
        df = pd.read_csv(file_path, encoding='ISO-8859-1')
        all_responses.append(df)

# Concatenate all dataframes into one
merged_responses = pd.concat(all_responses, ignore_index=True)




In [2]:
mapping = pd.read_csv('Web/mapping.csv')

In [3]:
def map_letters_to_models(merged_responses, mapping):
    # Iterate over each row in merged_responses
    for index, row in merged_responses.iterrows():
        # Find the corresponding row in mapping based on text_prompt
        mapping_row = mapping[mapping['text_prompt'] == row['text_prompt']].iloc[0]
        
        # Create a dictionary to map letters to model URLs
        letter_to_model = {
            'A': mapping_row['model_url_1'],
            'B': mapping_row['model_url_2'],
            'C': mapping_row['model_url_3'],
            'D': mapping_row['model_url_4']
        }
        
        # Replace letters in each ranking column with model names
        for column in ['smoothness_ranking', 'picture_quality_ranking', 'accuracy_ranking', 'overall_ranking']:
            if pd.notna(row[column]):
                merged_responses.at[index, column] = ','.join([letter_to_model[letter] for letter in row[column].split(',')])
    return merged_responses

def calculate_model_scores(merged_responses):
    # Initialize dictionaries to hold scores and all rankings for each model
    model_scores = {
        'smoothness_ranking': {},
        'picture_quality_ranking': {},
        'accuracy_ranking': {},
        'overall_ranking': {}
    }
    model_all_rankings = {
        'smoothness_ranking': {},
        'picture_quality_ranking': {},
        'accuracy_ranking': {},
        'overall_ranking': {}
    }

    # Calculate scores and store all rankings for each column
    for index, row in merged_responses.iterrows():
        for column in ['smoothness_ranking', 'picture_quality_ranking', 'accuracy_ranking', 'overall_ranking']:
            if pd.notna(row[column]):
                models = row[column].split(',')
                for position, model in enumerate(models):
                    points = position + 1
                    
                    # Initialize lists if model not present
                    if model not in model_scores[column]:
                        model_scores[column][model] = 0
                        model_all_rankings[column][model] = []
                    
                    model_scores[column][model] += points
                    model_all_rankings[column][model].append(points)

    # Calculate mean and std for each model
    result = {
        'mean': model_scores,
        'std': {k: {} for k in model_scores.keys()}
    }
    
    for column in model_scores.keys():
        total_rankings = len(merged_responses[column].dropna())
        for model in model_scores[column]:
            # Calculate mean
            result['mean'][column][model] /= total_rankings
            # Calculate std
            result['std'][column][model] = np.std(model_all_rankings[column][model])
            
    return result


In [4]:
print(f"Total number of rankings: {len(merged_responses['smoothness_ranking'].dropna())}")
merged_responses_mapped = map_letters_to_models(merged_responses, mapping)
model_scores = calculate_model_scores(merged_responses_mapped)

Total number of rankings: 400


In [5]:
# Update the print function to include std
def print_scores_as_table(model_scores):
    for ranking in model_scores['mean'].keys():
        print(f"Rankings for {ranking}:")
        # Create DataFrame with both mean and std
        scores_data = []
        for model in model_scores['mean'][ranking].keys():
            scores_data.append({
                'Model': model,
                'Mean': f"{model_scores['mean'][ranking][model]:.1f}",
                'Std': f"{model_scores['std'][ranking][model]:.2f}"
            })
        df_scores = pd.DataFrame(scores_data)
        print(df_scores)
        print()

print_scores_as_table(model_scores)

Rankings for smoothness_ranking:
       Model Mean   Std
0  FreeBloom  2.5  1.05
1    T2VZero  2.4  1.10
2      EIDTV  1.9  0.93
3   DirecT2V  3.2  0.97

Rankings for picture_quality_ranking:
       Model Mean   Std
0  FreeBloom  2.0  0.93
1    T2VZero  2.5  1.00
2      EIDTV  2.0  0.97
3   DirecT2V  3.5  0.81

Rankings for accuracy_ranking:
       Model Mean   Std
0  FreeBloom  2.0  1.01
1   DirecT2V  3.1  0.99
2      EIDTV  2.1  1.04
3    T2VZero  2.7  1.09

Rankings for overall_ranking:
       Model Mean   Std
0  FreeBloom  2.3  1.00
1      EIDTV  1.9  0.97
2    T2VZero  2.5  1.04
3   DirecT2V  3.3  0.94

