In [None]:
# ===== CONSTANTS =====
cache_file = "cache.csv"

# ===== CORE FUNCTIONS ===== 
#gives the content of a csv file and divides it into a header and lines
def get_content(file):
    with open(file, "r") as data:
        lines = data.readlines()
        header = lines[0].strip().split(",")
    return header, lines


#generates the cache file
def generate_cache_file(header, lines, file = cache_file):
    with open(file, "w") as new_file:
        new_file.write(",".join(header) + "\n")
        for line in lines:
            new_file.write(",".join(line) + "\n")


#generates a formatted table from the header and lines  -  Inspired by: https://stackoverflow.com/questions/70937491/python-flexible-way-to-format-string-output-into-a-table-without-using-a-non-st
def print_formatted_table(file = cache_file):
    header, lines = get_content(file)

    print("\nFormatted Table, from file:", file)
    print("With a total of " + str(len(lines)-1) +" entrys.")
    print("-" * round(23.2 * len(header)))
    print(("| {:<20} "*len(header) + "|").format( *header))
    print("-" * round(23.2 * len(header)))
    for line in lines[1:]:
        print(("| {:<20} "*len(header) + "|").format( *line.strip().split(",")))


# returns only the rows with the specified value in a column
def get_rows_by_value(column, value, file = cache_file, return_table = False,):
    header, lines = get_content(file)
    
    if column not in header:
        print("Spalte '" + column + "' nicht gefunden.")
        return

    col_index = header.index(column)
    rows = []
    for line in lines[1:]:

        row = line.strip().split(",")
        if row[col_index] in value:
            rows.append(row)
    
    #Output in file "cache.csv"
    generate_cache_file(header, rows)
    #Output in console
    print("\nOnly returned rows with value in column:", column)
    print("For Values:", value)
    if return_table:
        print_formatted_table()


#Sort table by column returns the sorted table in console and writes it to a file "cache.csv"
def sort_by(column, area, reverse=False, file=cache_file, return_table=False):

    header, lines = get_content(file)

    if column not in header:
        print("Spalte '" + column +"' nicht gefunden.")
        return
    if area == []:
        area = [1, len(lines)]
    rows = [line.strip().split(",") for line in lines[area[0] : area[1]]]

    #Bubble Sort explained here: # https://www.geeksforgeeks.org/bubble-sort/     inefficiant for large datasets
    col_index = header.index(column)
    for i in range(len(rows)):
        for j in range(0, len(rows) - i -1):
            a = int(rows[j][col_index])
            b = int(rows[j + 1][col_index])
            if (a > b and not reverse) or (a < b and reverse):
                rows[j], rows[j + 1] = rows[j + 1], rows[j]
    
    #Output in file "cache.csv"
    generate_cache_file(header, rows)
    #Output in console
    print("\nSorted by column:", column)
    print("Range:", area[0], "to", area[1])
    if return_table:
        print_formatted_table()


#Calculates the number of humans in the given table
def count_humans(file = cache_file):
    header, lines = get_content(file)
    
    col_index = header.index("Number")
    total_humans = 0
    for line in lines[1:]:
        row = line.strip().split(",")
        total_humans += int(row[col_index])

    return total_humans

#Calculates the average age of humans in the given table
def average_age(file=cache_file):
    header, lines = get_content(file)
    
    col_index = header.index("YearOfBirth")
    current_year = 2025

    total_age = 0
    count = 0
    for line in lines[1:]:
        row = line.strip().split(",")
        age = current_year - int(row[col_index])
        total_age += age
        count += 1
    
    if count == 0:
        print("No data available to calculate average age.")
        return None
    avg_age = total_age / count

    return avg_age
# ===== MAIN FUNCTION =====
def main():
    # Constants
    cache_file = "cache.csv"
    source_file = "names.csv"
    
    while True:
        
        # ask user if they want to filter data
        filter_choice = input("\nWould you like to filter the data? (yes/no/quit): ").lower()
        
        if filter_choice == 'quit':
            print("Exiting program...")
            break
            
        if filter_choice == 'yes':
            # ask for filter parameters from user
            print("\nAvailable columns to filter by: StateCode, YearOfBirth, Sex, Number")
            column = input("Enter column name to filter by: ")
            values = input(f"Enter values to filter for (comma-separated): ").split(',')
            values = [v.strip() for v in values]  # Clean up input
            
            # filter
            get_rows_by_value(column, values, source_file)
            print("\nFilter applied successfully!")
            
            # Show filtered data
            show_data = input("Would you like to see the filtered data? (yes/no): ").lower()
            if show_data == 'yes':
                print_formatted_table(cache_file)
            
            # Show statistics
            show_stats = input("\nWould you like to see statistics? (yes/no): ").lower()
            if show_stats == 'yes':
                print(f"\nTotal humans in filtered data: {count_humans(cache_file)}")
                avg = average_age(cache_file)
                if avg is not None:
                    print(f"Average age: {avg:.1f} years")
            
            # Sorting option
            sort_choice = input("\nWould you like to sort the data? (yes/no): ").lower()
            if sort_choice == 'yes':
                sort_column = input("Enter column to sort by: ")
                start = int(input("Enter start row (default 1): ") or 1)
                end = int(input("Enter end row (default all): ") or -1)
                direction = input("Sort direction (asc/desc): ").lower()
                sort_by(sort_column, [start, end], direction == 'desc', cache_file, True)
        
        elif filter_choice == 'no':
            # Show original data statistics
            print("\nShowing statistics for original data:")
            print(f"Total humans: {count_humans(source_file)}")
            avg = average_age(source_file)
            if avg is not None:
                print(f"Average age: {avg:.1f} years")
            continue
            
        else:
            print("Please enter 'yes', 'no', or 'quit'")
            continue
            

# ===== ENTRY POINT =====
if __name__ == "__main__":
    main()


Only returned rows with value in column: StateCode
For Values: ['DE']

Only returned rows with value in column: YearOfBirth
For Values: ['1990']

Only returned rows with value in column: Sex
For Values: ['M']

Formatted Table, from file: cache.csv
With a total of 173 entrys.
--------------------------------------------------------------------------------------------------------------------
| StateCode            | Sex                  | YearOfBirth          | Name                 | Number               |
--------------------------------------------------------------------------------------------------------------------
| DE                   | M                    | 1990                 | Michael              | 229                  |
| DE                   | M                    | 1990                 | Christopher          | 182                  |
| DE                   | M                    | 1990                 | Matthew              | 158                  |
| DE                 