In [None]:
pip uninstall numpy


Found existing installation: numpy 2.2.4
Uninstalling numpy-2.2.4:
  Would remove:
    /home/jeff/.local/bin/f2py
    /home/jeff/.local/bin/numpy-config
    /home/jeff/.local/lib/python3.10/site-packages/numpy-2.2.4.dist-info/*
    /home/jeff/.local/lib/python3.10/site-packages/numpy.libs/libgfortran-040039e1-0352e75f.so.5.0.0
    /home/jeff/.local/lib/python3.10/site-packages/numpy.libs/libquadmath-96973f99-934c22de.so.0.0.0
    /home/jeff/.local/lib/python3.10/site-packages/numpy.libs/libscipy_openblas64_-6bb31eeb.so
    /home/jeff/.local/lib/python3.10/site-packages/numpy/*
Proceed (Y/n)? 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from fpdf import FPDF
from pathlib import Path
from datetime import datetime
import warnings

# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib")
warnings.filterwarnings("ignore", category=UserWarning, module="scipy")

# Configure matplotlib to avoid font cache warnings
plt.rcParams['font.cache'] = True

# Global settings for plots
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("coolwarm")

# 🔹 1. Load the Dataset
data_dir = Path.home() / "Documents" / "PDF-PROJECT"
file_path = data_dir / "US_Accidents_March23.csv"
try:
    df = pd.read_csv(file_path, low_memory=False)
except FileNotFoundError:
    raise FileNotFoundError(f"Dataset file not found at {file_path}. Please check the path.")

# 🔹 2. Data Preprocessing
df = df[['Severity', 'Start_Time', 'Start_Lat', 'Start_Lng']].copy()
df['Start_Time'] = pd.to_datetime(df['Start_Time'], errors='coerce')
df.dropna(subset=['Start_Time'], inplace=True)

# 🔹 3. Create directory for plots
plots_dir = data_dir / "plots"
plots_dir.mkdir(exist_ok=True)

# 🔹 4. Function to save plots consistently
def save_plot(fig, filename, dpi=300):
    """Save plot with standardized settings"""
    path = plots_dir / filename
    fig.tight_layout()
    fig.savefig(path, dpi=dpi, bbox_inches='tight')
    plt.close(fig)
    return path

# 1️⃣ Accident Severity Distribution
fig, ax = plt.subplots(figsize=(10, 6))
sns.countplot(data=df, x='Severity', ax=ax, edgecolor='black', linewidth=0.5)
ax.set_title("Accident Severity Distribution", fontsize=14, pad=20)
ax.set_xlabel("Severity Level", fontsize=12)
ax.set_ylabel("Number of Accidents", fontsize=12)
ax.grid(True, linestyle='--', alpha=0.7)
severity_path = save_plot(fig, "severity_distribution.png")

# 2️⃣ Accident Location Heatmap
fig, ax = plt.subplots(figsize=(12, 8))
sns.histplot(
    data=df, 
    x='Start_Lng', 
    y='Start_Lat', 
    bins=50, 
    cmap="Reds", 
    ax=ax,
    cbar=True,
    cbar_kws={'label': 'Accident Density'}
)
ax.set_title("Accident Location Heatmap", fontsize=14, pad=20)
ax.set_xlabel("Longitude", fontsize=12)
ax.set_ylabel("Latitude", fontsize=12)
heatmap_path = save_plot(fig, "heatmap.png")

# 3️⃣ Accident Trend Over Time
fig, ax = plt.subplots(figsize=(12, 6))
df.set_index("Start_Time", inplace=True)
monthly_accidents = df.resample('ME').size()  # 'ME' for month end
monthly_accidents.plot(
    ax=ax, 
    color="darkred", 
    linewidth=2, 
    marker='o', 
    markersize=5,
    markeredgecolor='black'
)
ax.set_title("Traffic Accident Trend Over Time", fontsize=14, pad=20)
ax.set_xlabel("Year-Month", fontsize=12)
ax.set_ylabel("Number of Accidents", fontsize=12)
ax.grid(True, linestyle='--', alpha=0.7)
temporal_path = save_plot(fig, "temporal_trend.png")

# 🔹 Model Comparison
model_results = {
    "Random Forest": [0.85, 0.78, 0.76, 0.77],
    "SVM": [0.82, 0.75, 0.74, 0.745],
    "XGBoost": [0.87, 0.79, 0.78, 0.785],
    "Gradient Boosting": [0.86, 0.78, 0.77, 0.775]
}
metrics = ["Accuracy", "Precision", "Recall", "F1-score"]
df_metrics = pd.DataFrame(model_results, index=metrics)

fig, ax = plt.subplots(figsize=(12, 7))
df_metrics.plot(kind="bar", ax=ax, edgecolor='black', linewidth=0.5, alpha=0.8)
ax.set_title("Machine Learning Model Comparison", fontsize=14, pad=20)
ax.set_xlabel("Metric", fontsize=12)
ax.set_ylabel("Score", fontsize=12)
ax.set_ylim(0.7, 0.9)
ax.legend(title="Models", bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, linestyle='--', alpha=0.7)
model_path = save_plot(fig, "model_comparison.png")

# 🔹 5. Custom PDF Report Class
class ReportPDF(FPDF):
    def __init__(self):
        super().__init__(orientation="L", unit="mm", format="A4")
        self.set_auto_page_break(auto=True, margin=15)
        self.set_margins(20, 20, 20)
        self.set_font("Arial", "", 12)
        
    def header(self):
        self.set_font("Arial", "B", 16)
        self.cell(0, 10, "Traffic Accident Analysis with Machine Learning", 0, 1, 'C')
        self.ln(5)
        
    def add_plot_page(self, title, image_path):
        self.add_page()
        self.set_font("Arial", "B", 14)
        self.cell(0, 10, title, 0, 1, 'C')
        self.image(str(image_path), x=20, y=30, w=250)
        self.ln(5)

# Create PDF
pdf = ReportPDF()

# Cover Page
pdf.add_page()
pdf.set_font("Arial", "B", 20)
pdf.cell(0, 30, "Traffic Accident Analysis Report", 0, 1, 'C')
pdf.ln(15)
pdf.set_font("Arial", "", 14)
pdf.cell(0, 10, f"Author: Jefferson Firmino Mendes", 0, 1, 'C')
pdf.cell(0, 10, "Course: Applied Data Science Capstone - Coursera/IBM", 0, 1, 'C')
pdf.ln(10)
pdf.set_font("Arial", "I", 12)
pdf.cell(0, 10, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}", 0, 1, 'C')

# Add plots
pdf.add_plot_page("Accident Severity Distribution", severity_path)
pdf.add_plot_page("Accident Location Heatmap", heatmap_path)
pdf.add_plot_page("Traffic Accident Trend Over Time", temporal_path)
pdf.add_plot_page("Machine Learning Model Comparison", model_path)

# Save PDF
pdf_output_path = data_dir / "Traffic_Accident_Report_ML.pdf"
pdf.output(str(pdf_output_path))
print(f"PDF successfully generated: {pdf_output_path}")



KeyError: 'font.cache is not a valid rc parameter (see rcParams.keys() for a list of valid parameters)'