In [4]:
import os
import pandas as pd
from pptx import Presentation
from pptx.util import Inches

# Define the base path
base_path = '/home/hang/GitHub/BrainGNN_Pytorch/results/2024-06-12_18-08-37 perturbation fp/perturbation'

# Define the thresholds and group pairs
thresholds = ['5', '10', '15', '20', '25']
group_pairs = ['CN_vs_CN', 'CN_vs_Dementia', 'CN_vs_MCI', 'MCI_vs_Dementia']

# Function to find the file containing "Avarege" in its name
def find_averaged_file(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if "Avarege" in file:
                return os.path.join(root, file)
    return None
def add_slide_with_image(prs, title_text, image_path):
    slide_layout = prs.slide_layouts[5]  # 5 for title and content
    slide = prs.slides.add_slide(slide_layout)
    title = slide.shapes.title
    title.text = title_text

    left = Inches(1)
    top = Inches(1.5)
    pic = slide.shapes.add_picture(image_path, left, top, height=Inches(5))
# Function to find plot images in the 'plots' folder
def find_plot_images(directory):
    plot_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.png'):
                plot_files.append(os.path.join(root, file))
    return plot_files

# Initialize dictionaries to store data
threshold_data = {threshold: [] for threshold in thresholds}
group_data = {group_pair: [] for group_pair in group_pairs}

# Collect data from files
for threshold in thresholds:
    for group_pair in group_pairs:
        # Construct the directory path
        directory_path = os.path.join(base_path, threshold, group_pair, 'results')

        # Find the file containing "Avarege" in its name
        file_path = find_averaged_file(directory_path)

        if file_path:
            # Read the CSV file
            df = pd.read_csv(file_path)
            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]  # Remove unnamed columns
            df['Group Pair'] = group_pair.replace('_vs_', ' vs ')
            df['Threshold'] = threshold
            threshold_data[threshold].append(df)
            group_data[group_pair].append(df)
        else:
            print(f"File not found in directory: {directory_path}")

# Create a PowerPoint presentation object
prs = Presentation()

# Add slides for each threshold with different group pairs
for threshold, dfs in threshold_data.items():
    if dfs:
        # Combine dataframes and remove duplicate columns
        combined_df = pd.concat(dfs, axis=0, ignore_index=True)
        combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]
        combined_df = combined_df.drop(columns=['Threshold'])

        # Add a slide with a table for this threshold
        slide_layout = prs.slide_layouts[5]  # 5 for title and content
        slide = prs.slides.add_slide(slide_layout)
        title = slide.shapes.title
        title.text = f"Summary of Results at Threshold {threshold}"

        # Define the table dimensions
        rows, cols = combined_df.shape
        table_shape = slide.shapes.add_table(rows + 1, cols + 1, Inches(1), Inches(2), Inches(8), Inches(3))
        table = table_shape.table

        # Set the table column headers
        table.cell(0, 0).text = 'Group Pair'
        for col_idx, col_name in enumerate(combined_df.columns):
            table.cell(0, col_idx + 1).text = col_name.replace('_', ' ').replace(' vs ', ' vs ')

        # Fill the table with data
        for row_idx in range(rows):
            table.cell(row_idx + 1, 0).text = combined_df['Group Pair'][row_idx]
            for col_idx, col_name in enumerate(combined_df.columns):
                value = combined_df.iloc[row_idx, col_idx]
                if isinstance(value, float):
                    table.cell(row_idx + 1, col_idx + 1).text = f"{value:.3f}"
                else:
                    table.cell(row_idx + 1, col_idx + 1).text = str(value)

        # Add plots for the group pairs at this threshold
        plot_dir = os.path.join(base_path, threshold, group_pair, 'plots')
        plot_images = find_plot_images(plot_dir)
        for image_path in plot_images:
            add_slide_with_image(prs, f"Plots for Threshold {threshold}", image_path)

# Add slides for each group pair with different thresholds
for group_pair, dfs in group_data.items():
    if dfs:
        # Combine dataframes and remove duplicate columns
        combined_df = pd.concat(dfs, axis=0, ignore_index=True)
        combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]
        combined_df = combined_df.drop(columns=['Group Pair'])

        # Add a slide with a table for this group pair
        slide_layout = prs.slide_layouts[5]  # 5 for title and content
        slide = prs.slides.add_slide(slide_layout)
        title = slide.shapes.title
        title.text = f"Summary of Results for {group_pair.replace('_vs_', ' vs ')}"

        # Define the table dimensions
        rows, cols = combined_df.shape
        table_shape = slide.shapes.add_table(rows + 1, cols + 1, Inches(1), Inches(2), Inches(8), Inches(3))
        table = table_shape.table

        # Set the table column headers
        table.cell(0, 0).text = 'Threshold'
        for col_idx, col_name in enumerate(combined_df.columns):
            table.cell(0, col_idx + 1).text = col_name.replace('_', ' ').replace(' vs ', ' vs ')

        # Fill the table with data
        for row_idx in range(rows):
            table.cell(row_idx + 1, 0).text = combined_df['Threshold'][row_idx]
            for col_idx, col_name in enumerate(combined_df.columns):
                value = combined_df.iloc[row_idx, col_idx]
                if isinstance(value, float):
                    table.cell(row_idx + 1, col_idx + 1).text = f"{value:.3f}"
                else:
                    table.cell(row_idx + 1, col_idx + 1).text = str(value)

        # Add plots for this group pair
        plot_dir = os.path.join(base_path, threshold, group_pair, 'plots')
        plot_images = find_plot_images(plot_dir)
        for image_path in plot_images:
            add_slide_with_image(prs, f"Plots for {group_pair.replace('_vs_', ' vs ')}", image_path)



# Save the presentation
output_path = os.path.join(base_path, 'results_presentation.pptx')
prs.save(output_path)

print(f"Presentation saved at {output_path}")





Presentation saved at /home/hang/GitHub/BrainGNN_Pytorch/results/2024-06-12_18-08-37 perturbation fp/perturbation/results_presentation.pptx


In [2]:
import os
import pandas as pd
from pptx import Presentation
from pptx.util import Inches


# Define the base path
base_path = 'results/2024-06-12_19-40-22 fp'

# Define methods
methods = ['K_correlation', 'K_JS_Divergence', 'ScaledMahalanobisDistanceMatrix', 'Z_scoring']

# Function to find files containing "Avarege" in their names within a given directory and its subdirectories
def find_averaged_files(directory):
    files_found = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if "Avarege" in file:
                files_found.append(os.path.join(root, file))
    return files_found

# Function to find plot images in the 'plots' folder
def find_plot_images(directory):
    plot_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.png'):
                plot_files.append(os.path.join(root, file))
    return plot_files

# Initialize a dictionary to store data
data = {
    'by_method_group_pair': {},
    'by_group_pair_threshold': {},
    'by_threshold_method': {}
}

# Collect data from files
for method in methods:
    method_path = os.path.join(base_path, method)
    averaged_files = find_averaged_files(method_path)
    
    for file_path in averaged_files:
        # Read the CSV file
        df = pd.read_csv(file_path)
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]  # Remove unnamed columns

        # Extract group pair and threshold from the file path
        parts = file_path.split(os.sep)
        group_pair = parts[-3]
        threshold = parts[-4]

        # Clean up group pair name
        group_pair_clean = group_pair.replace('[', '').replace(']', '').replace("'", "").replace('_vs_', ' vs ')

        df['Group Pair'] = group_pair_clean
        df['Threshold'] = threshold
        df['Method'] = method

        # Store data in dictionaries
        if (method, group_pair) not in data['by_method_group_pair']:
            data['by_method_group_pair'][(method, group_pair)] = []
        data['by_method_group_pair'][(method, group_pair)].append(df)

        if (group_pair, threshold) not in data['by_group_pair_threshold']:
            data['by_group_pair_threshold'][(group_pair, threshold)] = []
        data['by_group_pair_threshold'][(group_pair, threshold)].append(df)

        if (threshold, method) not in data['by_threshold_method']:
            data['by_threshold_method'][(threshold, method)] = []
        data['by_threshold_method'][(threshold, method)].append(df)

# Create a PowerPoint presentation object
prs = Presentation()

def add_slide_with_table(prs, title_text, df, first_col_name):
    slide_layout = prs.slide_layouts[5]  # 5 for title and content
    slide = prs.slides.add_slide(slide_layout)
    title = slide.shapes.title
    title.text = title_text

    # Define the table dimensions
    rows, cols = df.shape
    table_shape = slide.shapes.add_table(rows + 1, cols, Inches(0.5), Inches(1.5), Inches(9), Inches(5))
    table = table_shape.table

    # Set the table column headers
    table.cell(0, 0).text = first_col_name
    for col_idx, col_name in enumerate(df.columns[1:], 1):  # Skip the first column for the custom header
        table.cell(0, col_idx).text = col_name.replace('_', ' ').replace(' vs ', ' vs ')

    # Fill the table with data
    for row_idx in range(rows):
        for col_idx, col_name in enumerate(df.columns):
            value = df.iloc[row_idx, col_idx]
            if isinstance(value, float):
                table.cell(row_idx + 1, col_idx).text = f"{value:.3f}"
            else:
                table.cell(row_idx + 1, col_idx).text = str(value)
    return slide

def add_slide_with_image(prs, title_text, image_path):
    slide_layout = prs.slide_layouts[5]  # 5 for title and content
    slide = prs.slides.add_slide(slide_layout)
    title = slide.shapes.title
    title.text = title_text

    left = Inches(1)
    top = Inches(1.5)
    pic = slide.shapes.add_picture(image_path, left, top, height=Inches(5))

# Add slides for comparisons
# 1. Compare the same method and group pair across thresholds
for (method, group_pair), dfs in data['by_method_group_pair'].items():
    if dfs:
        combined_df = pd.concat(dfs, axis=0, ignore_index=True)
        combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]
        combined_df = combined_df[['Threshold'] + [col for col in combined_df.columns if col not in ['Method', 'Group Pair', 'Threshold']]]
        group_pair_clean = group_pair.replace('[', '').replace(']', '').replace("'", "").replace('_vs_', ' vs ')
        slide = add_slide_with_table(prs, f"Results for {method} - {group_pair_clean} across thresholds", combined_df, "Threshold")
        
        # Add plots for the group pair
        plot_dir = os.path.join(base_path, method, threshold, group_pair, 'plots')
        plot_images = find_plot_images(plot_dir)
        for image_path in plot_images:
            add_slide_with_image(prs, f"Plot for {group_pair_clean}", image_path)

# 2. Compare the same group pair and threshold across methods
for (group_pair, threshold), dfs in data['by_group_pair_threshold'].items():
    if dfs:
        combined_df = pd.concat(dfs, axis=0, ignore_index=True)
        combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]
        combined_df = combined_df[['Method'] + [col for col in combined_df.columns if col not in ['Method', 'Group Pair', 'Threshold']]]
        group_pair_clean = group_pair.replace('[', '').replace(']', '').replace("'", "").replace('_vs_', ' vs ')
        slide = add_slide_with_table(prs, f"Results for {group_pair_clean} at threshold {threshold} across methods", combined_df, "Method")
        
        # Add plots for the group pair
        plot_dir = os.path.join(base_path, method, threshold, group_pair, 'plots')
        plot_images = find_plot_images(plot_dir)
        for image_path in plot_images:
            add_slide_with_image(prs, f"Plot for {group_pair_clean} at threshold {threshold}", image_path)

# 3. Compare the same threshold and method across group pairs
for (threshold, method), dfs in data['by_threshold_method'].items():
    if dfs:
        combined_df = pd.concat(dfs, axis=0, ignore_index=True)
        combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]
        combined_df = combined_df[['Group Pair'] + [col for col in combined_df.columns if col not in ['Method', 'Group Pair', 'Threshold']]]
        slide = add_slide_with_table(prs, f"Results for {method} at threshold {threshold} across group pairs", combined_df, "Group Pair")
        
        # Add plots for the threshold and method
        plot_dir = os.path.join(base_path, method, threshold, 'plots')
        plot_images = find_plot_images(plot_dir)
        for image_path in plot_images:
            add_slide_with_image(prs, f"Plot for {method} at threshold {threshold}", image_path)

# Save the presentation
output_path = os.path.join(base_path, 'results_presentation.pptx')
prs.save(output_path)

print(f"Presentation saved at {output_path}")




Presentation saved at results/2024-06-12_19-40-22 fp/results_presentation.pptx
