In [1]:
import numpy as np
import csv
from collections import defaultdict

In [10]:
def read_snr_csv(path):
    data = defaultdict(lambda: defaultdict(list))

    with open(path, newline='') as csvfile:
        csvreader = csv.reader(csvfile, delimiter='\\')

        # Process each row in the CSV
        for row in csvreader:
            reader_ID, chapter, rest = row[0], row[1], row[2]
            _, value_1, value_2 = rest.split(',')

            # Add values to the dictionary
            data[reader_ID][chapter].append((float(value_1), float(value_2)))
    return data

def compute_statistics(data):
    """
    Compute five-number summaries and means for value_1 and value_2.

    Parameters:
    data (dict): Nested dictionary containing (value_1, value_2) tuples.

    Returns:
    dict: A dictionary containing statistics for value_1 and value_2.
    """
    value_1_list = []
    value_2_list = []

    # Extract values from dictionary
    for reader_ID in data.values():
        for chapter in reader_ID.values():
            for value_1, value_2 in chapter:
                value_1_list.append(value_1)
                value_2_list.append(value_2)

    # Convert to numpy arrays
    value_1_arr = np.array(value_1_list)
    value_2_arr = np.array(value_2_list)

    # Compute five-number summary and mean
    def five_number_summary(arr):
        return {
            "min": np.min(arr),
            "Q1": np.percentile(arr, 25),
            "median": np.median(arr),
            "Q3": np.percentile(arr, 75),
            "max": np.max(arr),
            "mean": np.mean(arr)
        }

    return {
        "value_1": five_number_summary(value_1_arr),
        "value_2": five_number_summary(value_2_arr)
    }

def print_statistics(stats):
    """
    Print the five-number summaries and means rounded to five decimal places.

    Parameters:
    stats (dict): Dictionary containing statistics for value_1 and value_2.
    """
    for key, summary in stats.items():
        print(f"\nStatistics for {key}:")
        for stat_name, value in summary.items():
            print(f"  {stat_name}: {round(value, 4)}")

def percent_difference(initial, final):
    return (final - initial)/initial


In [4]:
data = read_snr_csv("../../data/output/snr_data_all.csv")

for reader_ID in data:
    print(f"Reader ID: {reader_ID}")
    print("Chapters:", list(data[reader_ID].keys()))
    print()  # Add a blank line for better readability

Reader ID: 19
Chapters: ['198', '227']

Reader ID: 26
Chapters: ['495', '496']

Reader ID: 27
Chapters: ['123349', '124992']

Reader ID: 32
Chapters: ['21625', '21631', '21634', '4137']

Reader ID: 39
Chapters: ['121914', '121915', '121916']

Reader ID: 40
Chapters: ['121026', '222']

Reader ID: 60
Chapters: ['121082']

Reader ID: 78
Chapters: ['368', '369']

Reader ID: 83
Chapters: ['11691', '3054', '9960']

Reader ID: 87
Chapters: ['121553']

Reader ID: 89
Chapters: ['218', '219']

Reader ID: 103
Chapters: ['1240', '1241']

Reader ID: 118
Chapters: ['121721', '124588', '47824']

Reader ID: 125
Chapters: ['121124', '121342']

Reader ID: 150
Chapters: ['126107', '126112', '132655']

Reader ID: 163
Chapters: ['121908', '122947']

Reader ID: 196
Chapters: ['122150', '122152', '122159']

Reader ID: 198
Chapters: ['126831', '129977', '209']

Reader ID: 200
Chapters: ['124139', '124140', '126784']

Reader ID: 201
Chapters: ['122255', '127786']

Reader ID: 211
Chapters: ['122425', '122442']


In [11]:
statistics = compute_statistics(data)
print_statistics(statistics)


Statistics for value_1:
  min: 0.954
  Q1: 11.802
  median: 15.8058
  Q3: 18.518
  max: 37.3284
  mean: 15.1214

Statistics for value_2:
  min: 0.5972
  Q1: 8.7658
  median: 12.239
  Q3: 14.8351
  max: 29.0571
  mean: 11.8289


In [12]:
for reader in data:
    print(f"Reader: {reader}")
    for chapter in data[reader].keys():
        percent_diff = np.array([percent_difference(initial,final) for initial,final in data[reader][chapter]])
        intial_snr  = np.array([initial for initial, _ in data[reader][chapter]])
        cleaned_snr = np.array([cleaned for _, cleaned in data[reader][chapter]])
        print(f"    Chapter: {chapter}")
        print(f"        Avg percent diff  : {np.mean(percent_diff)}")
        print(f"        Avg SNR pre clean : {np.mean(intial_snr)}")
        print(f"        Avg SNR post clean: {np.mean(cleaned_snr)}")


Reader: 19
    Chapter: 198
        Avg percent diff  : -0.1854437249282706
        Avg SNR pre clean : 15.971093243770767
        Avg SNR post clean: 13.02017893693522
    Chapter: 227
        Avg percent diff  : -0.17818077971338103
        Avg SNR pre clean : 15.924313419098267
        Avg SNR post clean: 13.090875647205129
Reader: 26
    Chapter: 495
        Avg percent diff  : -0.25557748517027
        Avg SNR pre clean : 7.70127130912547
        Avg SNR post clean: 5.733370631414561
    Chapter: 496
        Avg percent diff  : -0.24686527780556278
        Avg SNR pre clean : 8.217966984486033
        Avg SNR post clean: 6.194490355169729
Reader: 27
    Chapter: 123349
        Avg percent diff  : -0.2302424468975055
        Avg SNR pre clean : 9.213555031516938
        Avg SNR post clean: 7.10992757389114
    Chapter: 124992
        Avg percent diff  : -0.180234829030317
        Avg SNR pre clean : 9.749135784097517
        Avg SNR post clean: 8.034740384307701
Reader: 32
    Chap

In [13]:
all_values = []
uncleaned_all = []
cleaned_all = []
for reader in data:
    for chapter in data[reader].keys():
        all_values.extend(data[reader][chapter])
        uncleaned_all.extend([initial for initial, _ in data[reader][chapter]])
        cleaned_all.extend([cleaned for _, cleaned in data[reader][chapter]])

percent_diff_all = np.array([percent_difference(initial,final) for initial, final in all_values])
print(np.mean(percent_diff_all))
print(np.mean(np.array(uncleaned_all)))
print(np.mean(np.array(cleaned_all)))

-0.22893026591735602
15.121404755474519
11.828909489423223


In [14]:
all_values = []
uncleaned_all = []
cleaned_all = []
for reader in data:
    for chapter in data[reader].keys():
        all_values.extend(data[reader][chapter])
        uncleaned_all.extend([initial for initial, _ in data[reader][chapter]])
        cleaned_all.extend([cleaned for _, cleaned in data[reader][chapter]])


difference_list = np.array([unclean - clean for unclean, clean in all_values])
percent_diff_all = np.array([percent_difference(initial,final) for initial, final in all_values])

# print(np.mean(difference_list))

# print(np.std(difference_list))
print(np.std(percent_diff_all))

0.08277431654896233
