In [48]:
#Evaluation of Objective 1
#bullet points

#Step 1
#Create synthetic sentences

import pandas as pd

# Load the CSV file
file_path = './Data/Attribution/attribution_by_gics_Obj1.csv'
df = pd.read_csv(file_path)

# Define a function to create sentences based on the values
def create_sentences(row, benchmark_total_return):
    sector = row['GICS Sector']
    portfolio_weight = row['Portfolio Weight']
    benchmark_weight = row['Benchmark Weight']
    portfolio_return = row['Portfolio Return']
    benchmark_return = row['Benchmark Return']
    allocation_effect = row['Allocation Effect']
    selection_effect = row['Selection Effect']
    benchmark_total_return = benchmark_total_return

    # Determine the weight status
    weight_status = 'overweight' if portfolio_weight > benchmark_weight else 'underweight'

    # Determine the market performance
    market_performance = 'outperformed' if benchmark_return > benchmark_total_return else 'underperformed'
    
    # Determine portfolio performance vs benchmar
    sector_performance = 'outperformed' if portfolio_return > benchmark_return else 'underperformed'

    # Allocation sentence
    allocation_sentence = (
        f"The {sector} sector had a {'positive' if allocation_effect > 0 else 'negative'} allocation effect of {allocation_effect}. "
        f"This was due to the fund being {weight_status} compared to benchmark in a sector that {market_performance} the benchmark total return."
        #f"{weight_status.capitalize()} position {market_performance} the market."
    )

    # Selection sentence
    selection_sentence = (
        f"The {sector} sector had a {'positive' if selection_effect > 0 else 'negative'} selection effect of {selection_effect}. "
        f"Fund investments {sector_performance} compared to the sector benchmark."
    )

    return allocation_sentence, selection_sentence

# Process each block of 12 rows and create a separate CSV file
for start_row in range(0, df.shape[0], 12):
    block_df = df.iloc[start_row:start_row + 12]
    
    #Calculate the total benchmark return for the block
    benchmark_total_return = (block_df['Benchmark Weight'] * block_df['Benchmark Return']).sum()
    sentences = block_df.apply(lambda row: create_sentences(row, benchmark_total_return), axis=1)
    #sentences = block_df.apply(create_sentences, axis=1)
    sentences_df = pd.DataFrame(list(sentences), columns=['Allocation', 'Selection'])

    # Add 'Fund' and 'Period' from the original DataFrame to the sentences DataFrame
    # Reset the index of block_df to ensure alignment
    block_df = block_df.reset_index(drop=True)
    sentences_df['Fund'] = block_df.loc[0, 'Fund']  # Take the first 'Fund' value from the block
    sentences_df['Period'] = block_df.loc[0, 'Period']  # Take the first 'Period' value from the block
    
    # Construct the file name
    fund_period_str = block_df.iloc[0]['Fund'] + '_' + block_df.iloc[0]['Period'].replace(' ', '_').replace('/', '_')
    file_name = f'sentences_summary_new_{fund_period_str}.csv'

    # Save the sentences to a new CSV file
    output_file_path = f'./Data/Attribution/{file_name}'
    sentences_df.to_csv(output_file_path, index=False)

    print(f"Sentences for {fund_period_str} have been generated and saved to", output_file_path)

        
    

Sentences for Defensive_1_31_2022_to_3_31_2022 have been generated and saved to ./Data/Attribution/sentences_summary_new_Defensive_1_31_2022_to_3_31_2022.csv
Sentences for Defensive_4_1_2022_to_6_30_2022 have been generated and saved to ./Data/Attribution/sentences_summary_new_Defensive_4_1_2022_to_6_30_2022.csv
Sentences for Defensive_7_1_2022_to_9_30_2022 have been generated and saved to ./Data/Attribution/sentences_summary_new_Defensive_7_1_2022_to_9_30_2022.csv
Sentences for Defensive_10_1_2022_to_12_31_2022 have been generated and saved to ./Data/Attribution/sentences_summary_new_Defensive_10_1_2022_to_12_31_2022.csv
Sentences for Growth_1_31_2022_to_3_31_2022 have been generated and saved to ./Data/Attribution/sentences_summary_new_Growth_1_31_2022_to_3_31_2022.csv
Sentences for Growth_4_1_2022_to_6_30_2022 have been generated and saved to ./Data/Attribution/sentences_summary_new_Growth_4_1_2022_to_6_30_2022.csv
Sentences for Growth_7_1_2022_to_9_30_2022 have been generated and s

In [50]:
# compare synthetic with bullet points
# calculates similarities using GPT and ST


import os
import pandas as pd
import numpy as np
import openai
from sentence_transformers import SentenceTransformer
openai.api_key = ""

# File path for the similarity scores CSV
similarity_scores_file = './Data/Attribution/Eval_Obj_1/similarity_scores_new_ppts_update_zero.csv'

# Check if the file exists and delete its contents
if os.path.exists(similarity_scores_file):
    open(similarity_scores_file, 'w').close()

def extract_period(filename):
    # Split the filename into parts
    file_parts = filename.split('_')

    # The period starts from the sixth part and ends at the second last part
    #period_parts = file_parts[6:-2]
    period_parts = file_parts[5:-2] #for zero shot prompt
    # Join the period parts and replace spaces with underscores
    return '_'.join(period_parts).replace(' ', '_')   

# Function to extract the prefix type from the filename
def extract_prefix_type(filename, prefix):
    return prefix.split('_')[2] + '_' + prefix.split('_')[3]

#def extract_sector_from_sentence(sentence):
#    start = sentence.index("for sector ") + len("for sector ")
#    end = sentence.index(" ", start)
#    return sentence[start:end]

def extract_sector_from_sentence(sentence):
    # List of words that might follow a sector name
    following_words = ["is", "had", "was", "sector", "with", "in", "and"]
    
    # Find the start index of the sector name
    start = sentence.index("The ") + len("sector had ")
    
    # Split the sentence into words from the start index
    words = sentence[start:].split()

    # Extract words until we hit a following word or end of the list
    sector_name = []
    for word in words:
        if word in following_words:
            break
        sector_name.append(word)
    
    # Join the words to form the sector name and return it
    return " ".join(sector_name)



# Define your directory and file prefixes
directory = './Data/Attribution/Eval_Obj_1'
#prefixes = ["output_bullet_few_1_new_", "output_bullet_few_2_new_", "output_bullet_few_3_new_"]
prefixes = ["output_bullet_zero_2_"]
#prefixes = ["output_bullet_few_1_new_"]
fund_types = ['Defensive', 'Growth', 'Balanced']

# Function to get embeddings using OpenAI's API
def get_embeddings(text):
    response = openai.Embedding.create(input=[text], engine="text-embedding-ada-002")
    return response['data'][0]['embedding']

#ST model
model = SentenceTransformer('all-mpnet-base-v2') 
#sentence-transformers/all-mpnet-base-v2

#def calculate_similarity(sentence1, sentence2, model):
#    embeds1 = model.encode(sentence1) / np.linalg.norm(model.encode(sentence1), axis=-1, keepdims=True)
#    embeds2 = model.encode(sentence2) / np.linalg.norm(model.encode(sentence2), axis=-1, keepdims=True)
#    return np.dot(embeds1, embeds2.T)

def calculate_similarity(embedding1, embedding2):
    return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))

similarity_results = []


# Iterate over all files in the directory
for filename in os.listdir(directory):
    for prefix in prefixes:
        for fund in fund_types:
            if filename.startswith(prefix + fund):
                print(f"Processing file: {filename}")
                period = extract_period(filename)
                file_path = os.path.join(directory, filename)
                df = pd.read_csv(file_path)
                prefix_type = extract_prefix_type(filename, prefix)

                # Iterate over each row of the DataFrame
                for idx, row in df.iterrows():
                    sentence_1_col1 = row['Allocation']
                    sentence_1_col2 = row['Selection']
                    
                    
                    # Assuming there is a corresponding ground truth file with the same row order
                    ground_truth_file = os.path.join(directory, f"sentences_summary_new_{fund}_{period}.csv")
                    if os.path.exists(ground_truth_file):
                        ground_truth_df = pd.read_csv(ground_truth_file)
                        sentence_2_col1 = ground_truth_df.iloc[idx]['Allocation']
                        sentence_2_col2 = ground_truth_df.iloc[idx]['Selection']
                    
                        # Extract sector from the sentence
                        sector = extract_sector_from_sentence(sentence_2_col1)                        
                      
                        # Calculate similarities using SentenceTransformer
                        #similarity_col1 = calculate_similarity(sentence_1_col1, sentence_2_col1, encoder)
                        #similarity_col2 = calculate_similarity(sentence_1_col2, sentence_2_col2, encoder)
                        # Calculate similarities using SentenceTransformer
                        embeddings1 = model.encode([sentence_1_col1, sentence_1_col2])
                        embeddings2 = model.encode([sentence_2_col1, sentence_2_col2])
                        similarity_st_col1 = calculate_similarity(embeddings1[0], embeddings2[0])
                        similarity_st_col2 = calculate_similarity(embeddings1[1], embeddings2[1])
                        #embeddings_generated_st.extend([embeddings1[0], embeddings1[1]])
                        #embeddings_ground_truth_st.extend([embeddings2[0], embeddings2[1]])
                        
                        
                        # Calculate similarities using OpenAI's GPT-3
                        embedding_1_col1 = get_embeddings(sentence_1_col1)
                        embedding_2_col1 = get_embeddings(sentence_2_col1)
                        similarity_gpt3_col1 = calculate_similarity(embedding_1_col1, embedding_2_col1)

                        embedding_1_col2 = get_embeddings(sentence_1_col2)
                        embedding_2_col2 = get_embeddings(sentence_2_col2)
                        similarity_gpt3_col2 = calculate_similarity(embedding_1_col2, embedding_2_col2)
                        #embeddings_generated_gpt3.extend([embedding_1_col1, embedding_1_col2])
                        #embeddings_ground_truth_gpt3.extend([embedding_2_col1, embedding_2_col2])
                        
                        # Calculate similarity
                        #similarity_col1 = calculate_similarity(sentence_1_col1, sentence_2_col1, encoder)
                        #similarity_col2 = calculate_similarity(sentence_1_col2, sentence_2_col2, encoder)
                        #print(f"Scoring sector: {sector}")
                        # Append the similarity results along with the additional information
                        similarity_results.append({
                            'File Name': filename,
                            'Fund': fund,
                            'Period': period,
                            'Prefix Type': prefix_type,
                            'Sector': sector,
                            'Similarity_Allocation': similarity_st_col1,
                            'Similarity_Selection': similarity_st_col2,
                            
                            'GPT3_Similarity_Allocation': similarity_gpt3_col1,
                            'GPT3_Similarity_Selection': similarity_gpt3_col2
                        })

                    
                        
                        
# Convert the results to a DataFrame and save to a CSV file
similarity_results_df = pd.DataFrame(similarity_results)

similarity_results_df.to_csv(os.path.join(directory, 'similarity_scores_new_ppts_update_zero.csv'), index=False)
print("Similarity results saved.")






Processing file: output_bullet_zero_2_Growth_4_1_2022 to 6_30_2022_2023-11-05_18-47.csv
Processing file: output_bullet_zero_2_Defensive_7_1_2022 to 9_30_2022_2023-11-05_19-42.csv
Processing file: output_bullet_zero_2_Balanced_4_1_2022 to 6_30_2022_2023-11-05_19-05.csv
Processing file: output_bullet_zero_2_Defensive_4_1_2022 to 6_30_2022_2023-11-05_19-40.csv
Processing file: output_bullet_zero_2_Balanced_1_31_2022 to 3_31_2022_2023-11-05_19-00.csv
Processing file: output_bullet_zero_2_Growth_1_31_2022 to 3_31_2022_2023-11-05_18-42.csv
Processing file: output_bullet_zero_2_Balanced_10_1_2022 to 12_31_2022_2023-11-05_19-03.csv
Processing file: output_bullet_zero_2_Defensive_1_31_2022 to 3_31_2022_2023-11-05_19-34.csv
Processing file: output_bullet_zero_2_Growth_7_1_2022 to 9_30_2022_2023-11-05_18-50.csv
Processing file: output_bullet_zero_2_Defensive_10_1_2022 to 12_31_2022_2023-11-05_19-37.csv
Processing file: output_bullet_zero_2_Balanced_7_1_2022 to 9_30_2022_2023-11-05_19-08.csv
Proce

In [53]:
# compare synthetic with bullet points
# calculates similarities using ROUGE 

from rouge_score import rouge_scorer
import os
import pandas as pd
import numpy as np
import openai
from sentence_transformers import SentenceTransformer
#openai.api_key = ""

# Initialize the ROUGE scorer
#scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Function to compute ROUGE scores
def compute_rouge_scores(summary_text, reference_text):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference_text, summary_text)
    return {key: scores[key].fmeasure for key in scores}

# File path for the similarity scores CSV
rouge_scores_file = './Data/Attribution/Eval_Obj_1/rouge_scores.csv'

# Check if the file exists and delete its contents
if os.path.exists(similarity_scores_file):
    open(similarity_scores_file, 'w').close()

def extract_period(filename):
    # Split the filename into parts
    file_parts = filename.split('_')

    # The period starts from the sixth part and ends at the second last part
    period_parts = file_parts[6:-2]

    # Join the period parts and replace spaces with underscores
    return '_'.join(period_parts).replace(' ', '_')   

# Function to extract the prefix type from the filename
def extract_prefix_type(filename, prefix):
    return prefix.split('_')[2] + '_' + prefix.split('_')[3]

#def extract_sector_from_sentence(sentence):
#    start = sentence.index("for sector ") + len("for sector ")
#    end = sentence.index(" ", start)
#    return sentence[start:end]

def extract_sector_from_sentence(sentence):
    # List of words that might follow a sector name
    following_words = ["is", "had", "was", "sector", "with", "in", "and"]
    
    # Find the start index of the sector name
    start = sentence.index("The ") + len("sector had ")
    
    # Split the sentence into words from the start index
    words = sentence[start:].split()

    # Extract words until we hit a following word or end of the list
    sector_name = []
    for word in words:
        if word in following_words:
            break
        sector_name.append(word)
    
    # Join the words to form the sector name and return it
    return " ".join(sector_name)


# Define your directory and file prefixes
directory = './Data/Attribution/Eval_Obj_1'
prefixes = ["output_bullet_few_1_new_", "output_bullet_few_2_new_", "output_bullet_few_3_new_"]
#prefixes = ["output_bullet_zero_2_"]
#prefixes = ["output_bullet_few_1_new_"]
fund_types = ['Defensive', 'Growth', 'Balanced']

results = []

# Iterate over all files in the directory
for filename in os.listdir(directory):
    for prefix in prefixes:
        for fund in fund_types:
            if filename.startswith(prefix + fund):
                print(f"Processing file: {filename}")
                
                period = extract_period(filename)
                
                file_path = os.path.join(directory, filename)
                
                df = pd.read_csv(file_path)
                
                prefix_type = extract_prefix_type(filename, prefix)

                # Iterate over each row of the DataFrame
                for idx, row in df.iterrows():
                    sentence_1_col1 = row['Allocation']
                    sentence_1_col2 = row['Selection']
                    
                    # Extract sector from the sentence
                    sector = extract_sector_from_sentence(sentence_2_col1)                        

                    # Assuming there is a corresponding ground truth file with the same row order
                    ground_truth_file = os.path.join(directory, f"sentences_summary_new_{fund}_{period}.csv")
                    
                    ground_truth_df = pd.read_csv(ground_truth_file)
                    sentence_2_col1 = ground_truth_df.iloc[idx]['Allocation']
                    sentence_2_col2 = ground_truth_df.iloc[idx]['Selection']
                    
                    # Compute ROUGE scores for Allocation
                    rouge_scores_allocation = compute_rouge_scores(sentence_1_col1, sentence_2_col1)

                    # Compute ROUGE scores for Selection
                    rouge_scores_selection = compute_rouge_scores(sentence_1_col2, sentence_2_col2)

                    # Append results
                    results.append({
                        'File Name': filename,
                        'Fund': fund,
                        'Period': period,
                        'Prefix Type': prefix_type,
                        'Sector': sector,
                        'ROUGE-1 Allocation F-measure': rouge_scores_allocation['rouge1'],
                        'ROUGE-2 Allocation F-measure': rouge_scores_allocation['rouge2'],
                        'ROUGE-L Allocation F-measure': rouge_scores_allocation['rougeL'],
                        'ROUGE-1 Selection F-measure': rouge_scores_selection['rouge1'],
                        'ROUGE-2 Selection F-measure': rouge_scores_selection['rouge2'],
                        'ROUGE-L Selection F-measure': rouge_scores_selection['rougeL']
                    })

                    
                        
# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
results_df.to_csv("./Data/Attribution/Eval_Obj_1/rouge_scores.csv", index=False)

print("Results saved to CSV.")





Processing file: output_bullet_few_1_new_Balanced_10_1_2022 to 12_31_2022_2023-11-18_14-33.csv
Processing file: output_bullet_few_1_new_Balanced_7_1_2022 to 9_30_2022_2023-11-18_14-40.csv
Processing file: output_bullet_few_3_new_Growth_1_31_2022 to 3_31_2022_2023-11-16_01-44.csv
Processing file: output_bullet_few_3_new_Balanced_10_1_2022 to 12_31_2022_2023-11-16_01-16.csv
Processing file: output_bullet_few_3_new_Defensive_10_1_2022 to 12_31_2022_2023-11-16_01-32.csv
Processing file: output_bullet_few_1_new_Balanced_4_1_2022 to 6_30_2022_2023-11-18_14-37.csv
Processing file: output_bullet_few_2_new_Balanced_4_1_2022 to 6_30_2022_2023-11-16_02-09.csv
Processing file: output_bullet_few_3_new_Defensive_4_1_2022 to 6_30_2022_2023-11-16_01-37.csv
Processing file: output_bullet_few_2_new_Balanced_7_1_2022 to 9_30_2022_2023-11-16_02-12.csv
Processing file: output_bullet_few_2_new_Growth_7_1_2022 to 9_30_2022_2023-11-16_02-44.csv
Processing file: output_bullet_few_1_new_Growth_4_1_2022 to 6_30_

In [18]:
# Evaluation of Objective 1
# CSV tables
# this one works#

import os
import pandas as pd

# Define the directory where your files are located
directory = "./Data/Attribution/Eval_Obj_1"

# List of prefixes to filter files
prefixes = ["output_csv_zero_2_", "output_csv_few_1_new_", "output_csv_few_2_new_", "output_csv_few_3_new_"]

# Function to extract the prefix type from the filename
def extract_prefix_type(filename, prefixes):
    for prefix in prefixes:
        if filename.startswith(prefix):
            parts = prefix.split('_')
            return '_'.join(parts[2:4])
    return 'Unknown'

# Initialize a list to store the summary of each file
summary = []

# Function to compare floating-point values with a tolerance
def is_close(a, b, tol=1e-6):
    try:
        return abs(float(a) - float(b)) < tol
    except ValueError:
        # Return False if conversion to float fails
        return False

# Iterate over all files in the directory
for filename in os.listdir(directory):
    # Check if the file starts with any of the prefixes
    if any(filename.startswith(prefix) for prefix in prefixes) and filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)

        # Initialize total points for each comparison category, separated by effect type
        total_points_value_allocation = 0
        total_points_value_selection = 0
        total_points_sector_weight_allocation = 0
        total_points_sector_weight_selection = 0
        total_points_sector_performance_allocation = 0
        total_points_sector_performance_selection = 0

        # Initialize counters for maximum points
        max_points_value_allocation = 0
        max_points_value_selection = 0
        max_points_sector_weight_allocation = 0
        max_points_sector_weight_selection = 0
        max_points_sector_performance_allocation = 0
        max_points_sector_performance_selection = 0
        
        # Extract the prefix type from the filename
        prefix_type = extract_prefix_type(filename, prefixes)

        # Loop through each row of the DataFrame
        for index, row in df.iterrows():
            effect_type = row['Effect Type']
            if effect_type == 'Allocation':
                # Increment maximum points counters for Allocation
                max_points_value_allocation += 1
                if 'Sector Weight' in row and 'True Sector Weight' in row:
                    max_points_sector_weight_allocation += 1
                if 'Sector Performance' in row and 'True Performance' in row:
                    max_points_sector_performance_allocation += 1
            
            #effect_type = row['Effect Type']
            # Check if the current row is for Allocation or Selection
            #if effect_type == 'Allocation':
                if not pd.isna(row['Value']) and not pd.isna(row['True Value']) and is_close(row['Value'], row['True Value']):
                    total_points_value_allocation += 1
                if 'Sector Weight' in row and 'True Sector Weight' in row and not pd.isna(row['Sector Weight']) and not pd.isna(row['True Sector Weight']):
                    if isinstance(row['Sector Weight'], str) and isinstance(row['True Sector Weight'], str):
                        if row['Sector Weight'].strip().lower() == row['True Sector Weight'].strip().lower():
                            total_points_sector_weight_allocation += 1
                    elif is_close(row['Sector Weight'], row['True Sector Weight']):
                        total_points_sector_weight_allocation += 1
                if 'Sector Performance' in row and 'True Performance' in row and not pd.isna(row['Sector Performance']) and not pd.isna(row['True Performance']) and isinstance(row['Sector Performance'], str) and row['Sector Performance'].strip().lower() == row['True Performance'].strip().lower():
                    total_points_sector_performance_allocation += 1
            elif effect_type == 'Selection':
                # Increment maximum points counters for Selection
                max_points_value_selection += 1
                # Selection is always blank for Sector Weight
                max_points_sector_weight_selection += 1
                if 'Sector Performance' in row and 'True Performance' in row:
                    max_points_sector_performance_selection += 1                
                if not pd.isna(row['Value']) and not pd.isna(row['True Value']) and is_close(row['Value'], row['True Value']):
                    total_points_value_selection += 1

                # Check if 'Sector Weight' is blank (NaN) for Selection
                #if pd.isna(row['Sector Weight']):
                if df['Sector Weight'].empty:
                    total_points_sector_weight_selection += 1
                
                if 'Sector Performance' in row and 'True Performance' in row and not pd.isna(row['Sector Performance']) and not pd.isna(row['True Performance']) and isinstance(row['Sector Performance'], str) and row['Sector Performance'].strip().lower() == row['True Performance'].strip().lower():
                    total_points_sector_performance_selection += 1

        # Append the summary of points for this file to the summary list
        summary.append({
            'File Name': filename,
            'Prefix Type': prefix_type,
            'Value Points Allocation': total_points_value_allocation,
            'Value Ratio Allocation': total_points_value_allocation / max_points_value_allocation,
            'Sector Weight Points Allocation': total_points_sector_weight_allocation,
            'Sector Weight Ratio Allocation': total_points_sector_weight_allocation / max_points_sector_weight_allocation,
            'Sector Performance Points Allocation': total_points_sector_performance_allocation,
            'Sector Performance Ratio Allocation': total_points_sector_performance_allocation / max_points_sector_performance_allocation,
            # Repeat for Selection
            'Value Points Selection': total_points_value_selection,
            'Value Ratio Selection': total_points_value_selection / max_points_value_selection,
            'Sector Weight Points Selection': total_points_sector_weight_selection,
            'Sector Weight Ratio Selection': total_points_sector_weight_selection / max_points_sector_weight_selection,

            'Sector Performance Points Selection': total_points_sector_performance_selection,
            'Sector Performance Ratio Selection': total_points_sector_performance_selection / max_points_sector_performance_selection
        })

# Convert the summary to a DataFrame and save it as a CSV file
summary_df = pd.DataFrame(summary)
summary_csv_path = os.path.join(directory, 'summary_csv_points_by_effect_type_few_new.csv')
summary_df.to_csv(summary_csv_path, index=False)

#summary_csv_path = "./Data/Attribution/Eval_Obj_1_csv/summary_csv_points.csv"
summary_df = pd.read_csv(summary_csv_path)

# Function to extract fund type from filename
def extract_fund_type(filename):
    # Split the filename and extract the fund type
    parts = filename.split('_')
    for part in parts:
        if part in ['Balanced', 'Growth', 'Defensive']:
            return part
    return "Unknown"  # Return 'Unknown' if fund type is not found

# Apply the function to extract fund type and create a new column
summary_df['Fund Type'] = summary_df['File Name'].apply(extract_fund_type)

# Optionally, save the updated DataFrame back to a CSV file
summary_df.to_csv(summary_csv_path, index=False)

print(f"Summary CSV saved at: {summary_csv_path}")


Summary CSV saved at: ./Data/Attribution/Eval_Obj_1/summary_csv_points_by_effect_type_few_new.csv
