In [3]:
class CSVDataProcessor:
    def __init__(self, file_name: str):
        self.file_name = file_name
        self.data = self.read_csv_data()

    def read_column(self, file_name: str, column_number: int):
        # Function to read a single specified column from a CSV file
        with open(file_name, 'r') as file:
            lines = file.readlines()
        headers = lines[0].strip().split(',')
        column_name = headers[column_number]
        column_data = [float(line.strip().split(',')[column_number]) for line in lines[1:]]
        return column_name, column_data

    def read_csv_data(self):
        # Function to read all columns from the CSV file into a dictionary
        with open(self.file_name, 'r') as file:
            lines = file.readlines()
        headers = lines[0].strip().split(',')
        data_dict = {}
        for i, header in enumerate(headers):
            _, column_data = self.read_column(self.file_name, i)
            data_dict[header] = column_data
        return data_dict

    def paired_t_test(self, list1, list2):
        # Function to calculate the paired Student’s t-test statistic
        if len(list1) != len(list2):
            raise ValueError("Lists must be of the same length")
        n = len(list1)
        differences = [list1[i] - list2[i] for i in range(n)]
        mean_diff = sum(differences) / n
        sum_squared_diff = sum((x - mean_diff) ** 2 for x in differences)
        var_diff = sum_squared_diff / (n - 1)
        t_statistic = mean_diff / ((var_diff / n) ** 0.5)
        return t_statistic

    def generate_paired_t_tests(self):
        # Function to generate paired t-test statistics for each pair of columns
        t_test_results = []
        columns = list(self.data.keys())
        for i in range(len(columns)):
            for j in range(len(columns)):
                if i != j:
                    col1 = columns[i]
                    col2 = columns[j]
                    t_statistic = self.paired_t_test(self.data[col1], self.data[col2])
                    t_test_results.append((col1, col2, t_statistic))
        return t_test_results

    def print_custom_table(self, t_test_results, border_char, columns):
        # Function to print the paired t-test statistics in a custom table format
        col_width = max(len(col) for col in columns) + 2
        border = border_char * ((col_width + 3) * (len(columns) + 1) + 1)

        # Print header row
        print(border)
        print(f"{border_char:<3}", end="")
        for col in columns:
            print(f"{col:^{col_width}}{border_char}", end="")
        print()
        print(border)

        # Print data rows
        for row in columns:
            print(f"{border_char:<3}{row:^{col_width}}{border_char}", end="")
            for col in columns:
                if row == col:
                    print(f"{' - ':^{col_width}}{border_char}", end="")
                else:
                    t_value = next((t for (r, c, t) in t_test_results if r == row and c == col), 0)
                    formatted_value = f"*{t_value:.4f}*" if abs(t_value) > 2.02 else f"{t_value:.4f}"
                    print(f"{formatted_value:^{col_width}}{border_char}", end="")
            print()
            print(border)

In [4]:
# Example usage
processor = CSVDataProcessor('Resit tasks\\resit_task1.csv')

In [5]:
t_test_results = processor.generate_paired_t_tests()

In [6]:
columns_to_include = ["10-14 years old", "15-19 years old", "20-24 years old", "25-29 years old"]

In [7]:
processor.print_custom_table(t_test_results, '*', columns_to_include)

*****************************************************************************************************
*   10-14 years old * 15-19 years old * 20-24 years old * 25-29 years old *
*****************************************************************************************************
*   10-14 years old *        -        *     0.0000      *     0.0000      *     0.0000      *
*****************************************************************************************************
*   15-19 years old *     0.0000      *        -        *   *-26.1338*    *   *-27.8313*    *
*****************************************************************************************************
*   20-24 years old *     0.0000      *    *26.1338*    *        -        *    *-7.3147*    *
*****************************************************************************************************
*   25-29 years old *     0.0000      *    *27.8313*    *    *7.3147*     *        -        *
**************************************

In [None]:
def read_column(file_name, column_number):
    """
    Reads a single specified column from a CSV file.
    """
    with open(file_name, 'r') as file:
        lines = file.readlines()
    headers = lines[0].strip().split(',')
    column_name = headers[column_number]
    column_data = [float(line.strip().split(',')[column_number]) for line in lines[1:]]
    return column_name, column_data

def read_csv_data(file_name):
    """
    Reads all columns from the CSV file into a dictionary.
    """
    with open(file_name, 'r') as file:
        lines = file.readlines()
    headers = lines[0].strip().split(',')
    data_dict = {}
    for i, header in enumerate(headers):
        _, column_data = read_column(file_name, i)
        data_dict[header] = column_data
    return data_dict

def paired_t_test(list1, list2):
    """
    Calculates the paired Student’s t-test statistic for two lists of data.
    """
    if len(list1) != len(list2):
        raise ValueError("Lists must be of the same length")
    n = len(list1)
    differences = [list1[i] - list2[i] for i in range(n)]
    mean_diff = sum(differences) / n
    sum_squared_diff = sum((x - mean_diff) ** 2 for x in differences)
    var_diff = sum_squared_diff / (n - 1)
    t_statistic = mean_diff / ((var_diff / n) ** 0.5)
    return t_statistic

def generate_paired_t_tests(data_dict):
    """
    Generates paired t-test statistics for each pair of columns.
    """
    t_test_results = []
    columns = list(data_dict.keys())
    for i in range(len(columns)):
        for j in range(len(columns)):
            if i != j:
                col1 = columns[i]
                col2 = columns[j]
                t_statistic = paired_t_test(data_dict[col1], data_dict[col2])
                t_test_results.append((col1, col2, t_statistic))
    return t_test_results

def print_custom_table(t_test_results, border_char, columns):
    """
    Prints the paired t-test statistics in a custom table format.
    """
    col_width = max(len(col) for col in columns) + 2
    border = border_char * ((col_width + 3) * (len(columns) + 1) + 1)

    # Print header row
    print(border)
    print(f"{border_char:<3}", end="")
    for col in columns:
        print(f"{col:^{col_width}}{border_char}", end="")
    print()
    print(border)

    # Print data rows
    for row in columns:
        print(f"{border_char:<3}{row:^{col_width}}{border_char}", end="")
        for col in columns:
            if row == col:
                print(f"{' - ':^{col_width}}{border_char}", end="")
            else:
                t_value = next((t for (r, c, t) in t_test_results if r == row and c == col), 0)
                formatted_value = f"*{t_value:.4f}*" if abs(t_value) > 2.02 else f"{t_value:.4f}"
                print(f"{formatted_value:^{col_width}}{border_char}", end="")
        print()
        print(border)

# Example usage
file_name = 'resit_task1.csv'
data_dict = read_csv_data(file_name)
t_test_results = generate_paired_t_tests(data_dict)
columns_to_include = ["10-14 years old", "15-19 years old", "20-24 years old", "25-29 years old"]
print_custom_table(t_test_results, '*', columns_to_include)
