# Playground

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

# Load the processed data
data_path = Path('../data/processed/events_2025.csv')

if not data_path.exists():
    print(f"Warning: {data_path} does not exist. Please run data_pipeline.py first.")
else:
    # Load the CSV file
    df = pd.read_csv(data_path)
    
    # Convert occurrence_date to datetime
    df['occurrence_date'] = pd.to_datetime(df['occurrence_date'], errors='coerce')
    
    # Extract year, month, day if not already present
    if 'year' not in df.columns:
        df['year'] = df['occurrence_date'].dt.year
    if 'month' not in df.columns:
        df['month'] = df['occurrence_date'].dt.month
    if 'day' not in df.columns:
        df['day'] = df['occurrence_date'].dt.day
    
    print("Data loaded successfully!")
    print(f"Total records: {len(df)}")
    print(f"Date range: {df['occurrence_date'].min()} to {df['occurrence_date'].max()}")

Data loaded successfully!
Total records: 6403
Date range: 2011-08-30 00:00:00+00:00 to 2025-11-14 08:07:00+00:00


In [2]:
available_continents = sorted([c for c in df['continent'].dropna().unique()])
available_continents


['Africa',
 'Antarctica',
 'Asia',
 'Europe',
 'North America',
 'Oceania',
 'South America']

In [5]:
df['continent'].value_counts()

continent
North America    2052
Africa           1586
South America     888
Oceania           705
Antarctica        615
Asia              530
Europe             27
Name: count, dtype: int64