# NYC Leading Causes of Death — Project (Deaths column)

**File used:** `/mnt/data/New_York_City_Leading_Causes_of_Death_20251107.csv`



In [12]:
import sys, subprocess
print("Kernel Python:", sys.executable)
subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "pip"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas"])
import pandas as pd
print("pandas version:", pd.__version__)


Kernel Python: /usr/local/bin/python3.11
Collecting pip
  Downloading pip-25.3-py3-none-any.whl (1.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 MB 17.9 MB/s eta 0:00:00
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 22.3.1
    Uninstalling pip-22.3.1:
      Successfully uninstalled pip-22.3.1
Successfully installed pip-25.3
Collecting pandas
  Downloading pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting numpy>=1.23.2 (from pandas)
  Downloading numpy-2.3.4-cp311-cp311-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m27.6 MB/s[0m  [33m0:00:00[0

## 1) pandas — read and compute basic stats

In [14]:
import pandas as pd

FILE = r'/Users/macbook/Desktop/DSPC 6000/New_York_City_Leading_Causes_of_Death_20251107.csv'

# Read with pandas
df = pd.read_csv(FILE)

# Make sure Deaths is numeric
df["Deaths"] = pd.to_numeric(df["Deaths"], errors="coerce")

# Drop missing
deaths = df["Deaths"].dropna()

mean_pandas = deaths.mean()
median_pandas = deaths.median()
mode_pandas = deaths.mode()[0] if not deaths.mode().empty else None

print("pandas results:")
print(f"  Mean   : {mean_pandas:.2f}")
print(f"  Median : {median_pandas:.2f}")
print(f"  Mode   : {mode_pandas}")

pandas results:
  Mean   : 429.26
  Median : 140.00
  Mode   : 1.0


## 2) The hard way 



In [15]:
import csv

FILE = r'/Users/macbook/Desktop/DSPC 6000/New_York_City_Leading_Causes_of_Death_20251107.csv'

# Read CSV via standard library and extract numeric values from the "Deaths" column
with open(FILE, "r", newline="", encoding="utf-8") as f:
    reader = csv.reader(f)
    header = next(reader)
    # Find the "Deaths" column index
    try:
        death_idx = header.index("Deaths")
    except ValueError:
        raise RuntimeError("Column 'Deaths' not found in CSV header")
    
    deaths_list = []
    for row in reader:
        if death_idx < len(row):
            val = row[death_idx].strip()
            if val != "":
                try:
                    deaths_list.append(float(val))
                except ValueError:
                    # non-numeric -> skip
                    pass

# Mean
mean_std = sum(deaths_list) / len(deaths_list)

# Median
sorted_vals = sorted(deaths_list)
n = len(sorted_vals)
if n % 2 == 1:
    median_std = sorted_vals[n // 2]
else:
    median_std = (sorted_vals[n // 2 - 1] + sorted_vals[n // 2]) / 2

# Mode via frequency dict
freq = {}
for v in sorted_vals:
    freq[v] = freq.get(v, 0) + 1

max_count = max(freq.values())
# If multiple modes exist, we'll pick the smallest numeric mode for reproducibility
mode_candidates = [k for k, c in freq.items() if c == max_count]
mode_std = min(mode_candidates)

print("Standard library results:")
print(f"  Mean   : {mean_std:.2f}")
print(f"  Median : {median_std:.2f}")
print(f"  Mode   : {mode_std}")

Standard library results:
  Mean   : 429.26
  Median : 140.00
  Mode   : 1.0


## 3) Visualization — ASCII bar chart (standard library drawing)

- The data/calculations can come from pandas, but the **drawing** is plain Python.
- We aggregate total deaths by `Year` with pandas (for convenience), then render a scaled ASCII chart using only `print()` and strings.


In [16]:
# Aggregate with pandas for convenience
totals_by_year = (
    df.dropna(subset=["Deaths"])
      .groupby("Year")["Deaths"]
      .sum()
      .astype(int)
      .sort_index()
)

# Prepare a simple ASCII bar chart with max width of 50 characters
max_total = totals_by_year.max()
width = 50

def render_bar(total, max_total, width):
    if max_total <= 0:
        return ""
    bar_len = int(total * width / max_total)
    return "*" * bar_len

print("Total deaths per Year (scaled)\n")
for year, total in totals_by_year.items():
    bar = render_bar(total, max_total, width)
    print(f"{year}: {bar} ({total})")

Total deaths per Year (scaled)

2007: ******************************** (53996)
2008: ******************************** (54138)
2009: ******************************** (52820)
2010: ******************************* (52505)
2011: ******************************** (52726)
2012: ******************************* (52420)
2013: ******************************** (53387)
2014: ******************************** (53006)
2015: ******************************** (54120)
2016: ********************************* (54280)
2017: ********************************* (54319)
2018: ********************************* (55081)
2019: ********************************* (54559)
2020: ************************************************** (82142)
2021: ************************************** (63560)
