In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os

## Lets get graphing

In [2]:
denoised_info_root = "./finished/gauss"
og_and_noised_root = "./noised_images_data/gauss"
end_folder = "./bar_graphs/gauss"

In [3]:
os.makedirs(end_folder, exist_ok=True)

files = os.listdir(denoised_info_root)
for file in files:
    subfolder = os.path.join(denoised_info_root, file)
    if os.path.isdir(subfolder):
        subfiles = os.listdir(subfolder)
        for subfile in subfiles:
            subfolder2 = os.path.join(subfolder, subfile)
            if os.path.isdir(subfolder2):
                subfiles2 = os.listdir(subfolder2)
                for subfile2 in subfiles2:
                    if subfile2.endswith('.csv'):
                        csv_path = os.path.join(subfolder2, subfile2)
                        df = pd.read_csv(csv_path)
                        
                        for col in df.columns[1:]:
                            plt.figure(figsize=(10, 6))
                            
                            if pd.api.types.is_numeric_dtype(df[col]) and not pd.api.types.is_bool_dtype(df[col]):
                                plt.hist(df[col], bins=20, edgecolor='black')
                                mean_val = df[col].mean()
                                median_val = df[col].median()
                                plt.axvline(mean_val, color='red', linestyle='--', label=f'Mean: {mean_val:.2f}')
                                plt.axvline(median_val, color='green', linestyle=':', label=f'Median: {median_val:.2f}')
                                plt.legend()
                            elif pd.api.types.is_bool_dtype(df[col]):
                                value_counts = df[col].value_counts()
                                plt.bar([str(x) for x in value_counts.index], value_counts.values)
                            
                            plt.title(f"{subfile2[16:-4]} - {col}")
                            plt.xlabel(col)
                            plt.ylabel('Frequency')
                            plt.tight_layout()
                            
                            save_name = f"{subfile2[16:-4]}_{col}_hist.png"
                            save_path = os.path.join(end_folder, save_name)
                            if os.path.exists(save_path):
                                os.remove(save_path)
                                plt.savefig(save_path, dpi=300)
                            plt.close()

In [4]:
os.makedirs(end_folder, exist_ok=True)

csv_files = []
for root, dirs, files in os.walk(og_and_noised_root):
    for file in files:
        if file.endswith('.csv'):
            csv_files.append(os.path.join(root, file))

for csv_path in csv_files:
    df = pd.read_csv(csv_path)
    csv_name = os.path.basename(csv_path)[:-4]  # Remove .csv
    
    for col in df.columns[1:]:
        plt.figure(figsize=(10, 6))
        
        if pd.api.types.is_numeric_dtype(df[col]) and not pd.api.types.is_bool_dtype(df[col]):
            plt.hist(df[col], bins=20, edgecolor='black')
            mean_val = df[col].mean()
            median_val = df[col].median()
            plt.axvline(mean_val, color='red', linestyle='--', label=f'Mean: {mean_val:.2f}')
            plt.axvline(median_val, color='green', linestyle=':', label=f'Median: {median_val:.2f}')
            plt.legend()
        elif pd.api.types.is_bool_dtype(df[col]):
            value_counts = df[col].value_counts()
            plt.bar([str(x) for x in value_counts.index], value_counts.values)
        
        plt.title(f"{csv_name} - {col}")
        plt.xlabel(col)
        plt.ylabel('Frequency')
        plt.tight_layout()
        
        save_name = f"{csv_name}_{col}_hist.png"
        save_path = os.path.join(end_folder, save_name)
        if os.path.exists(save_path):
            os.remove(save_path)
        plt.savefig(save_path, dpi=300)
        plt.close()