In [42]:
import polars as pl
pl.__version__  # The book is built with Polars version 1.20.0

'1.32.2'

In [1]:
import polars as pl

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read {file_path} into a Polars DataFrame.")
    print(f"Shape: {df.shape}")  # Display the number of rows and columns
    print(df.head()) #Print the first 5 rows to show a sample of the data.

except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV: {e}")  # Handles common parsing problems

# Now you can analyze the DataFrame (e.g., `df.describe()`, `df.filter(...)`, etc.)


Successfully read C:\temp\coroner_data.csv into a Polars DataFrame.
Shape: (1223, 18)
shape: (5, 18)
┌─────────────┬─────────────┬─────────────────┬─────┬───┬──────────────────┬─────────────────┬───────────┬─────────────┐
│ Case Number ┆ Case Status ┆ Manner of Death ┆ Age ┆ … ┆ Cause of Death   ┆ Other           ┆ Latitude  ┆ Longitude   │
│ ---         ┆ ---         ┆ ---             ┆ --- ┆   ┆ ---              ┆ Significant     ┆ ---       ┆ ---         │
│ str         ┆ str         ┆ str             ┆ i64 ┆   ┆ str              ┆ Condition       ┆ f64       ┆ f64         │
│             ┆             ┆                 ┆     ┆   ┆                  ┆ ---             ┆           ┆             │
│             ┆             ┆                 ┆     ┆   ┆                  ┆ str             ┆           ┆             │
╞═════════════╪═════════════╪═════════════════╪═════╪═══╪══════════════════╪═════════════════╪═══════════╪═════════════╡
│ 21-02626    ┆ Closed      ┆ Suicide         ┆ 98  

In [44]:
import polars as pl
from io import StringIO

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read {file_path} into a Polars DataFrame.")

    # Count occurrences of each "Resident Zip" and sort descending
    zip_counts = df.group_by("Resident Zip").agg(pl.len().alias("count")).sort("count", descending=True)

    print("\nResident Zip Counts (Highest to Lowest):")
    print(zip_counts)

except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

# no known zip can be indicative of no home


Successfully read C:\temp\coroner_data.csv into a Polars DataFrame.

Resident Zip Counts (Highest to Lowest):
shape: (122, 2)
┌──────────────┬───────┐
│ Resident Zip ┆ count │
│ ---          ┆ ---   │
│ i64          ┆ u32   │
╞══════════════╪═══════╡
│ null         ┆ 88    │
│ 95112        ┆ 48    │
│ 95037        ┆ 36    │
│ 95111        ┆ 36    │
│ 95127        ┆ 35    │
│ …            ┆ …     │
│ 94541        ┆ 1     │
│ 94117        ┆ 1     │
│ 72703        ┆ 1     │
│ 93912        ┆ 1     │
│ 94580        ┆ 1     │
└──────────────┴───────┘


In [46]:

#PURE PYTHON

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path
output_txt_path = "C:\\temp\\resident_zip_counts.txt"  # Replace with your desired .txt output path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read coroner data from {file_path} into a Polars DataFrame.")

    # Count occurrences of each "Resident Zip" and retrieve corresponding "Resident City"
    zip_counts = (
        df.group_by(["Resident Zip", "Resident City"])
        .agg(pl.len().alias("count"))
        .sort("count", descending=True)
    )

    print("\nResident Zip Counts (Highest to Lowest) with Cities:")
    print(zip_counts)

    #Generate TXT file listing zip codes, handling missing data
    with open(output_txt_path, "w") as f:
        #Write header
        f.write("Resident Zip Counts (Highest to Lowest):\n")

        #Iterate through the DataFrame and write each zip code and associated city
        for row in zip_counts.rows():
            zip_code, city, count = row

            if zip_code is None or str(zip_code).strip() == "":
                f.write(f"Zip Code: No Known Zip, City: {city}, Count: {count}\n")
            else:
                 f.write(f"Zip Code: {zip_code}, City: {city}, Count: {count}\n")
    print(f"Successfully generated TXT file: {output_txt_path}")

except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Successfully read coroner data from C:\temp\coroner_data.csv into a Polars DataFrame.

Resident Zip Counts (Highest to Lowest) with Cities:
shape: (133, 3)
┌──────────────┬───────────────────────┬───────┐
│ Resident Zip ┆ Resident City         ┆ count │
│ ---          ┆ ---                   ┆ ---   │
│ i64          ┆ str                   ┆ u32   │
╞══════════════╪═══════════════════════╪═══════╡
│ null         ┆ N/A                   ┆ 82    │
│ 95112        ┆ San Jose              ┆ 48    │
│ 95111        ┆ San Jose              ┆ 36    │
│ 95037        ┆ Morgan Hill           ┆ 35    │
│ 95127        ┆ San Jose              ┆ 35    │
│ …            ┆ …                     ┆ …     │
│ 95330        ┆ Lathrop               ┆ 1     │
│ 92154        ┆ San Diego             ┆ 1     │
│ null         ┆ Viluppuran; Tamilnadu ┆ 1     │
│ 95023        ┆ Rural Gilroy          ┆ 1     │
│ 98115        ┆ Seattle               ┆ 1     │
└──────────────┴───────────────────────┴───────┘
Successfull

In [64]:
import polars as pl
import polars.selectors as cs

file_path = "C:\\temp\\coroner_data.csv"
output_txt_path = "C:\\temp\\race_statistics.txt"

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read coroner data from {file_path} into a Polars DataFrame.")
    
    # Calculate total count for percentages
    total_count = len(df)
    
    # Group by Race and calculate count, percentage, and average age
    race_stats = (
        df.group_by("Race")
        .agg([
            pl.len().alias("count"),
            pl.col("Age").mean().alias("average_age")
        ])
        .with_columns([
            (pl.col("count") / total_count * 100).alias("percentage")
        ])
        .sort("count", descending=True)
    )
    
    print("\nRace Statistics:")
    print(race_stats)
    
    # Write to text file
    with open(output_txt_path, "w") as f:
        f.write("Race Statistics Report\n")
        f.write("=" * 60 + "\n\n")
        f.write(f"Total Records: {total_count}\n\n")
        
        for row in race_stats.rows():
            race, count, avg_age, percentage = row
            race_name = race if race is not None else "Unknown"
            avg_age_str = f"{avg_age:.1f}" if avg_age is not None else "N/A"
            
            f.write(f"Race: {race_name}\n")
            f.write(f"  Count: {count}\n")
            f.write(f"  Percentage: {percentage:.2f}%\n")
            f.write(f"  Average Age: {avg_age_str}\n")
            f.write("-" * 60 + "\n")
    
    print(f"Successfully generated TXT file: {output_txt_path}")
    
except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Successfully read coroner data from C:\temp\coroner_data.csv into a Polars DataFrame.

Race Statistics:
shape: (15, 4)
┌─────────────────────────────────┬───────┬─────────────┬────────────┐
│ Race                            ┆ count ┆ average_age ┆ percentage │
│ ---                             ┆ ---   ┆ ---         ┆ ---        │
│ str                             ┆ u32   ┆ f64         ┆ f64        │
╞═════════════════════════════════╪═══════╪═════════════╪════════════╡
│ White                           ┆ 596   ┆ 54.417785   ┆ 48.732625  │
│ Asian                           ┆ 297   ┆ 45.582492   ┆ 24.284546  │
│ Hispanic/Latino                 ┆ 235   ┆ 38.412766   ┆ 19.215045  │
│ BlackAfricanAmerican            ┆ 29    ┆ 36.965517   ┆ 2.371218   │
│ Other                           ┆ 28    ┆ 30.535714   ┆ 2.289452   │
│ …                               ┆ …     ┆ …           ┆ …          │
│ American Indian                 ┆ 3     ┆ 37.666667   ┆ 0.245298   │
│ American Indian / Alaskan N

In [29]:

# POLARS SELECTORS EXAMPLE

import polars as pl
import polars.selectors as cs

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path
output_txt_path = "C:\\temp\\polars_resident_zip_counts.txt"  # Replace with your desired .txt output path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read coroner data from {file_path} into a Polars DataFrame.")

    # Count occurrences of each "Resident Zip" and retrieve corresponding "Resident City"
    zip_counts = (
        df.group_by([cs.starts_with("Resident Zip"), cs.starts_with("Resident City")])
        .agg(pl.len().alias("count"))
        .sort("count", descending=True)
    )

    print("\nResident Zip Counts (Highest to Lowest) with Cities:")
    print(zip_counts)

    #Generate TXT file listing zip codes, handling missing data
    with open(output_txt_path, "w") as f:
        #Write header
        f.write("Resident Zip Counts (Highest to Lowest):\n")

        #Iterate through the DataFrame and write each zip code and associated city
        for row in zip_counts.rows():
           zip_code, city, count = row

           if zip_code is None or str(zip_code).strip() == "":
                f.write(f"Zip Code: No Known Zip, City: {city}, Count: {count}\n")
           else:
                f.write(f"Zip Code: {zip_code}, City: {city}, Count: {count}\n")
    print(f"Successfully generated TXT file: {output_txt_path}")

except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Successfully read coroner data from C:\temp\coroner_data.csv into a Polars DataFrame.

Resident Zip Counts (Highest to Lowest) with Cities:
shape: (133, 3)
┌──────────────┬───────────────┬───────┐
│ Resident Zip ┆ Resident City ┆ count │
│ ---          ┆ ---           ┆ ---   │
│ i64          ┆ str           ┆ u32   │
╞══════════════╪═══════════════╪═══════╡
│ null         ┆ N/A           ┆ 82    │
│ 95112        ┆ San Jose      ┆ 48    │
│ 95111        ┆ San Jose      ┆ 36    │
│ 95127        ┆ San Jose      ┆ 35    │
│ 95037        ┆ Morgan Hill   ┆ 35    │
│ …            ┆ …             ┆ …     │
│ null         ┆ San Jose      ┆ 1     │
│ 94596        ┆ Walnut Creek  ┆ 1     │
│ 94580        ┆ San Lorenzo   ┆ 1     │
│ 77092        ┆ Houston       ┆ 1     │
│ 95023        ┆ Rural Gilroy  ┆ 1     │
└──────────────┴───────────────┴───────┘
Successfully generated TXT file: C:\temp\polars_resident_zip_counts.txt


In [57]:
# filter template
#df.filter(pl.col("Resident City") & (pl.col("Other Significant Condition") == "Depression"))  

df.filter(
    pl.col("Resident City").is_not_null() & 
    (pl.col("Other Significant Condition") == "Depression")
).write_csv("C:\\temp\\resident_city.csv")

In [63]:
df.filter(
    pl.col("Resident City").is_not_null() & 
    (pl.col("Other Significant Condition") != "Depression")
).write_csv("C:\\temp\\no_depression_resident_city.csv")

In [60]:
df_no_city = df.filter(
    (pl.col("Resident City") == 'N/A') & 
    (pl.col("Other Significant Condition") == "Depression")
).write_csv("C:\\temp\\no_resident_city.csv")

In [54]:
# Filter for rows with Resident City AND Depression
 
df_with_city = df.filter(
    pl.col("Resident City").is_not_null() & 
    (pl.col("Other Significant Condition") == "Depression")
)

# Filter for rows without Resident City but with Depression
df_no_city = df.filter(
    pl.col("Resident City") == "N/A" & 
    (pl.col("Other Significant Condition") == "Depression")
)

# Write to files
df_with_city.write_csv("C:\\temp\\residence_depression.txt")
df_no_city.write_csv("C:\\temp\\no_residence_depression.txt")

In [51]:
print(df["Resident City"].value_counts())

shape: (76, 2)
┌─────────────────┬───────┐
│ Resident City   ┆ count │
│ ---             ┆ ---   │
│ str             ┆ u32   │
╞═════════════════╪═══════╡
│ San Luis Obispo ┆ 1     │
│ Carson City     ┆ 1     │
│ Antioch         ┆ 1     │
│ Clearlake       ┆ 1     │
│ Newbury Park    ┆ 1     │
│ …               ┆ …     │
│ Bakersfield     ┆ 1     │
│ San Jose        ┆ 577   │
│ Roseville       ┆ 1     │
│ Los Gatos       ┆ 23    │
│ Saratoga        ┆ 24    │
└─────────────────┴───────┘


In [31]:
import polars as pl
import polars.selectors as cs
from io import StringIO

csv_data = """Case Number,Case Status,Manner of Death,Age,Race,Gender,Death Date,Death City,Death Zip,Resident City,Resident Zip,Incident Location,Incident City,Incident Zip,Cause of Death,Other Significant Condition,Latitude,Longitude
21-02626,Closed,Suicide,98,White,Male,5/28/2021 12:02,Los Altos,94022,Los Altos,94022,N/A,Los Altos,94022,Gunshot wound of head,None,37.3574779,-122.1444356
23-01099,Closed,Suicide,95,White,Female,3/19/2023 12:55,Santa Clara,95054,Santa Clara,95054,N/A,Santa Clara,95054,Combined hydrocodone; tramadol; codeine; morphine; and acetaminophen toxicity,Congestive heart failure; stage IV following lymphoma; hypertension; atherosclerotic cardiovascular disease; chronic kidney disease; chronic pain,37.3934098,-121.9647014
24-04244,Closed,Suicide,94,Asian,Male,12/17/2024 4:11,San Jose,95129,San Jose,95129,N/A,San Jose,95129,Hanging,None,37.3057424,-122.000927
23-04023,Closed,Suicide,93,White,Female,12/1/2023 9:40,Santa Clara,95050,Santa Clara,95050,N/A,Santa Clara,95050,Combined drug toxicity (Secobarbital; alprazolam; diphenhydramine; citalopram/escitalopram),Advanced age,37.3511428,-121.9523083
19-03242,Closed,Suicide,93,White,Male,11/18/2019 19:08,San Jose,95116,San Jose,95135,N/A,San Jose,95135,Gunshot wound of head,None,37.3504209,-121.8525852
21-05157,Closed,Suicide,93,White,Male,12/20/2021 14:43,Palo Alto,94301,Palo Alto,94301,N/A,Palo Alto,94301,Perforating contact submental gunshot wound of the head,None,37.4417477,-122.1499165"""

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path
output_txt_path = "C:\\temp\\year_death_counts.txt"  # Replace with your desired .txt output path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read coroner data from {file_path} into a Polars DataFrame.")

    # Extract the year from the 'Death Date' column using a regex and selector
    date_col = df.select(cs.starts_with("Death Date")).columns[0]  # Get the actual column name
    df = df.with_columns(
        pl.col(date_col).str.extract(r"(\d{4})").alias("Death Year")
    )

    # Check if the column exists.
    if "Death Year" not in df.columns:
        print("Could not Extract Date from given data")
        # exit() #or continue and skip what cannot be done.

    # Group by 'Death Year' and count the number of deaths
    year_counts = (
        df.group_by("Death Year")
        .agg(pl.len().alias("Death Count"))
        .sort("Death Year")  # Sort the year
    )

    print("\nYear and Death Counts:")
    print(year_counts)

    # Generate TXT file
    with open(output_txt_path, "w") as f:
        f.write("Year and Death Counts:\n")
        for row in year_counts.rows():
            year, count = row
            f.write(f"Year: {year}, Death Count: {count}\n")

    print(f"Successfully generated TXT file: {output_txt_path}")

except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Successfully read coroner data from C:\temp\coroner_data.csv into a Polars DataFrame.

Year and Death Counts:
shape: (8, 2)
┌────────────┬─────────────┐
│ Death Year ┆ Death Count │
│ ---        ┆ ---         │
│ str        ┆ u32         │
╞════════════╪═════════════╡
│ 2018       ┆ 150         │
│ 2019       ┆ 169         │
│ 2020       ┆ 166         │
│ 2021       ┆ 154         │
│ 2022       ┆ 181         │
│ 2023       ┆ 138         │
│ 2024       ┆ 168         │
│ 2025       ┆ 97          │
└────────────┴─────────────┘
Successfully generated TXT file: C:\temp\year_death_counts.txt


In [34]:
import polars as pl
import polars.selectors as cs

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path
output_txt_path = "C:\\temp\\resident_zip_counts.txt"  # Replace with your desired .txt output path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read coroner data from {file_path} into a Polars DataFrame.")

    # Count occurrences of each "Resident Zip" and retrieve corresponding "Resident City"
    resident_zip_col = df.select(cs.starts_with("Resident Zip")).columns[0]
    resident_city_col = df.select(cs.starts_with("Resident City")).columns[0]

    zip_counts = (
        df.group_by([resident_zip_col, resident_city_col])
        .agg(pl.len().alias("count"))
        .sort("count", descending=True)
    )

    print("\nResident Zip Counts (Highest to Lowest) with Cities:")
    print(zip_counts)

    # Generate TXT file listing zip codes, handling missing data
    with open(output_txt_path, "w") as f:
        # Write header
        f.write("Resident Zip Counts (Highest to Lowest):\n")

        # Iterate through the DataFrame and write each zip code and associated city
        for row in zip_counts.rows():
            zip_code, city, count = row

            # Convert to string and check for missing or empty values
            zip_code_str = str(zip_code) if zip_code is not None else ""

            if not zip_code_str.strip():
                f.write(f"Zip Code: No Known Zip, City: {city}, Count: {count}\n")
            else:
                f.write(f"Zip Code: {zip_code}, City: {city}, Count: {count}\n")

    print(f"Successfully generated TXT file: {output_txt_path}")

except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
 

Successfully read coroner data from C:\temp\coroner_data.csv into a Polars DataFrame.

Resident Zip Counts (Highest to Lowest) with Cities:
shape: (133, 3)
┌──────────────┬───────────────┬───────┐
│ Resident Zip ┆ Resident City ┆ count │
│ ---          ┆ ---           ┆ ---   │
│ i64          ┆ str           ┆ u32   │
╞══════════════╪═══════════════╪═══════╡
│ null         ┆ N/A           ┆ 82    │
│ 95112        ┆ San Jose      ┆ 48    │
│ 95111        ┆ San Jose      ┆ 36    │
│ 95037        ┆ Morgan Hill   ┆ 35    │
│ 95127        ┆ San Jose      ┆ 35    │
│ …            ┆ …             ┆ …     │
│ 93434        ┆ Guadalupe     ┆ 1     │
│ 59602        ┆ Helena        ┆ 1     │
│ 94560        ┆ Newark        ┆ 1     │
│ 94018        ┆ El Granada    ┆ 1     │
│ 94602        ┆ Oakland       ┆ 1     │
└──────────────┴───────────────┴───────┘
Successfully generated TXT file: C:\temp\resident_zip_counts.txt


In [38]:
import polars as pl
import polars.selectors as cs

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path
output_txt_path = "C:\\temp\\polars_year_death_counts.txt"  # Replace with your desired .txt output path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read coroner data from {file_path} into a Polars DataFrame.")

    # Extract the year from the 'Death Date' column using a regex and selector
    date_col = df.select(cs.starts_with("Death Date")).columns[0]  # Get the actual column name
    df = df.with_columns(
        pl.col(date_col).str.extract(r"(\d{4})").alias("Death Year")
    )

    # Check if the column exists.
    if "Death Year" not in df.columns:
        print("Could not Extract Date from given data. Continuing without year-based analysis.")
        total_death_count = df.height  # If no Death Year, total is just number of rows
    else:
        # Group by 'Death Year' and count the number of deaths
        year_counts = (
            df.group_by("Death Year")
            .agg(pl.len().alias("Death Count"))
            .sort("Death Year")  # Sort the year
        )

        print("\nYear and Death Counts:")
        print(year_counts)

        # Calculate total death count
        total_death_count = year_counts["Death Count"].sum()

        # Generate TXT file
        with open(output_txt_path, "w") as f:
            f.write("Year and Death Counts:\n")
            for row in year_counts.rows():
                year, count = row
                f.write(f"Year: {year}, Death Count: {count}\n")

            # Append total death count to the file
            f.write(f"\nTotal Death Count: {total_death_count}\n")

    print(f"Total Death Count: {total_death_count}")  # Print to console
    print(f"Successfully generated TXT file: {output_txt_path}") #This only prints if the date parses!

except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Successfully read coroner data from C:\temp\coroner_data.csv into a Polars DataFrame.

Year and Death Counts:
shape: (8, 2)
┌────────────┬─────────────┐
│ Death Year ┆ Death Count │
│ ---        ┆ ---         │
│ str        ┆ u32         │
╞════════════╪═════════════╡
│ 2018       ┆ 150         │
│ 2019       ┆ 169         │
│ 2020       ┆ 166         │
│ 2021       ┆ 154         │
│ 2022       ┆ 181         │
│ 2023       ┆ 138         │
│ 2024       ┆ 168         │
│ 2025       ┆ 97          │
└────────────┴─────────────┘
Total Death Count: 1223
Successfully generated TXT file: C:\temp\polars_year_death_counts.txt


In [39]:
import polars as pl
import polars.selectors as cs

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path
output_txt_path = "C:\\temp\\other_counts.txt"  # Replace with your desired .txt output path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read coroner data from {file_path} into a Polars DataFrame.")

    # Get the actual column name for "Other Significant Condition" using a selector
    condition_col = df.select(cs.starts_with("Other Significant Condition")).columns[0]

    # Count occurrences of each significant condition and sort descending
    condition_counts = (
        df.group_by(condition_col)
        .agg(pl.len().alias("Count"))
        .sort("Count", descending=True)
    )

    print("\nOther Significant Conditions and Counts:")
    print(condition_counts)

    # Create TXT Report
    with open(output_txt_path, "w") as f:
        f.write("Other Significant Conditions (Highest to Lowest):\n\n")
        for row in condition_counts.rows():
            condition, count = row
            # Handle missing conditions
            if condition is None or str(condition).strip() == "":
                f.write(f"Condition: None Listed, Count: {count}\n")
            else:
                f.write(f"Condition: {condition}, Count: {count}\n")  # Proper formatting

    print(f"Successfully generated report file: {output_txt_path}")


except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Successfully read coroner data from C:\temp\coroner_data.csv into a Polars DataFrame.

Other Significant Conditions and Counts:
shape: (319, 2)
┌─────────────────────────────────┬───────┐
│ Other Significant Condition     ┆ Count │
│ ---                             ┆ ---   │
│ str                             ┆ u32   │
╞═════════════════════════════════╪═══════╡
│ None                            ┆ 636   │
│ N/A                             ┆ 193   │
│ Depression                      ┆ 21    │
│ Major depressive disorder       ┆ 16    │
│ Bipolar disorder                ┆ 6     │
│ …                               ┆ …     │
│ Gastroesophegeal junction aden… ┆ 1     │
│ Major depressive disorder; pos… ┆ 1     │
│ Persistent depressive disorder… ┆ 1     │
│ Depression; obsessive compulsi… ┆ 1     │
│ Generalized anxiety disorder; … ┆ 1     │
└─────────────────────────────────┴───────┘
Successfully generated report file: C:\temp\other_counts.txt


In [40]:
import polars as pl
import polars.selectors as cs

file_path = "C:\\temp\\coroner_data.csv"  # Replace with your actual file path
output_txt_path = "C:\\temp\\significant_conditions_report.txt"  # Replace with desired output path

try:
    df = pl.read_csv(file_path)
    print(f"Successfully read coroner data from {file_path} into a Polars DataFrame.")

    # Get the actual column name for "Other Significant Condition" using a selector
    condition_col = df.select(cs.starts_with("Other Significant Condition")).columns[0]

    # Count occurrences of each significant condition
    condition_counts = (
        df.group_by(condition_col)
        .agg(pl.len().alias("Count"))
        .sort("Count", descending=True)
    )

    # Calculate total count
    total_count = df.height

    # Calculate percentages and add to DataFrame
    condition_counts = condition_counts.with_columns(
        (pl.col("Count") / total_count * 100).round(2).alias("Percentage")  # Calculate and round
    )

    print("\nOther Significant Conditions and Counts (with Percentages):")
    print(condition_counts)

    # Generate TXT Report with Counts and Percentages
    with open(output_txt_path, "w") as f:
        f.write("Other Significant Conditions (Highest to Lowest) with Percentages:\n\n")
        for row in condition_counts.rows():
            condition, count, percentage = row
            #Handle cases for empty row
            if condition is None or str(condition).strip() == "":
                f.write(f"Condition: None Listed, Count: {count}, Percentage: {percentage}%\n")
            else:
                f.write(f"Condition: {condition}, Count: {count}, Percentage: {percentage}%\n")

        f.write(f"\nTotal Records: {total_count}\n")  # Add Total Record to show it.

    print(f"Successfully generated report file: {output_txt_path}")

except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
except pl.exceptions.ComputeError as e:
    print(f"Error reading CSV or processing data: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Successfully read coroner data from C:\temp\coroner_data.csv into a Polars DataFrame.

Other Significant Conditions and Counts (with Percentages):
shape: (319, 3)
┌─────────────────────────────────┬───────┬────────────┐
│ Other Significant Condition     ┆ Count ┆ Percentage │
│ ---                             ┆ ---   ┆ ---        │
│ str                             ┆ u32   ┆ f64        │
╞═════════════════════════════════╪═══════╪════════════╡
│ None                            ┆ 636   ┆ 52.0       │
│ N/A                             ┆ 193   ┆ 15.78      │
│ Depression                      ┆ 21    ┆ 1.72       │
│ Major depressive disorder       ┆ 16    ┆ 1.31       │
│ Depression; anxiety             ┆ 6     ┆ 0.49       │
│ …                               ┆ …     ┆ …          │
│ Major depression                ┆ 1     ┆ 0.08       │
│ Depression and anxiety; prior … ┆ 1     ┆ 0.08       │
│ Prior stroke; hypertension; hy… ┆ 1     ┆ 0.08       │
│ Depression and anxiety          ┆ 1  

In [61]:
print(df["Other Significant Condition"].value_counts(sort=True))

shape: (319, 2)
┌─────────────────────────────────┬───────┐
│ Other Significant Condition     ┆ count │
│ ---                             ┆ ---   │
│ str                             ┆ u32   │
╞═════════════════════════════════╪═══════╡
│ None                            ┆ 636   │
│ N/A                             ┆ 193   │
│ Depression                      ┆ 21    │
│ Major depressive disorder       ┆ 16    │
│ Bipolar disorder                ┆ 6     │
│ …                               ┆ …     │
│ Mild depression with social ad… ┆ 1     │
│ Major depressive disorder; rec… ┆ 1     │
│ Attention-Deficit/Hyperactivit… ┆ 1     │
│ History of self-injurious beha… ┆ 1     │
│ Metastatic small cell carcinom… ┆ 1     │
└─────────────────────────────────┴───────┘


In [62]:
df["Other Significant Condition"].value_counts(sort=True).write_csv("C:\\temp\\condition_counts.csv")