# Playground

In [12]:
import pandas as pd
import numpy as np
from pathlib import Path

In [13]:
ocurrences_df = pd.read_csv("../data/processed/occurrences.csv")
events_df = pd.read_csv("../data/processed/events.csv")

# convert to datetime
ocurrences_df['date'] = pd.to_datetime(ocurrences_df['date'])
events_df['date'] = pd.to_datetime(events_df['date'])

In [14]:
events_df.head(1)

Unnamed: 0,event_id,title,description,link,categories,sources,closed,status,category_titles,date,longitude,latitude,continent,country
0,EONET_6470,"Lewotobi Volcano, Indonesia",,https://eonet.gsfc.nasa.gov/api/v3/events/EONE...,['Volcanoes'],"['EO', 'SIVolcano']",,,,NaT,,,,


In [15]:
print(f"Total events: {len(events_df)}")

agg_df = events_df.groupby(events_df['date']).agg(count=('date', 'size')).reset_index()

# Ensure date is in datetime format
agg_df['date'] = pd.to_datetime(agg_df['date'])

agg_df

Total events: 5689


Unnamed: 0,date,count
0,2024-01-04 00:00:00+00:00,4
1,2024-01-11 00:00:00+00:00,3
2,2024-01-18 00:00:00+00:00,1
3,2024-02-02 00:00:00+00:00,2
4,2024-02-08 00:00:00+00:00,2
...,...,...
2051,2025-11-17 16:54:00+00:00,1
2052,2025-11-18 00:00:00+00:00,1
2053,2025-11-18 00:56:00+00:00,1
2054,2025-11-18 17:59:00+00:00,1


In [16]:
agg_df = events_df.groupby(events_df['date']).agg(count=('date', 'size')).reset_index()

agg_df.tail()


Unnamed: 0,date,count
2051,2025-11-17 16:54:00+00:00,1
2052,2025-11-18 00:00:00+00:00,1
2053,2025-11-18 00:56:00+00:00,1
2054,2025-11-18 17:59:00+00:00,1
2055,2025-11-20 09:18:00+00:00,1


In [17]:
ocurrences_df.head(2)

Unnamed: 0,event_id,event_title,event_description,event_link,category_ids,category_titles,occurrence_date,occurrence_type,longitude,latitude,month,day,region,country,continent,date
0,EONET_15876,"RX Rains 5318 Prescribed Fire, Rains, Texas",,https://eonet.gsfc.nasa.gov/api/v3/events/EONE...,wildfires,Wildfires,,Point,-95.68231,32.738759,11,14,Texas,United States,North America,2025-11-14 08:07:00+00:00
1,EONET_15877,"PACK Wildfire, Mono, California","6 Miles S from CROWLEY LAKE, CA",https://eonet.gsfc.nasa.gov/api/v3/events/EONE...,wildfires,Wildfires,,Point,-118.794566,37.555334,11,13,California,United States,North America,2025-11-13 15:00:00+00:00


In [18]:
ocurrences_df["category_titles"].value_counts()


category_titles
Wildfires           11165
Sea and Lake Ice      936
Severe Storms          86
Volcanoes              40
Name: count, dtype: int64

In [19]:
import pandas as pd
import numpy as np
from pathlib import Path

# Load the processed data
data_path = Path('../data/processed/events_2025.csv')

if not data_path.exists():
    print(f"Warning: {data_path} does not exist. Please run data_pipeline.py first.")
else:
    # Load the CSV file
    df = pd.read_csv(data_path)
    
    # Convert occurrence_date to datetime
    df['occurrence_date'] = pd.to_datetime(df['occurrence_date'], errors='coerce')
    
    # Extract year, month, day if not already present
    if 'year' not in df.columns:
        df['year'] = df['occurrence_date'].dt.year
    if 'month' not in df.columns:
        df['month'] = df['occurrence_date'].dt.month
    if 'day' not in df.columns:
        df['day'] = df['occurrence_date'].dt.day
    
    print("Data loaded successfully!")
    print(f"Total records: {len(df)}")
    print(f"Date range: {df['occurrence_date'].min()} to {df['occurrence_date'].max()}")



In [20]:
available_continents = sorted([c for c in ocurrences_df['continent'].dropna().unique()])
available_continents


['Africa',
 'Antarctica',
 'Asia',
 'Europe',
 'North America',
 'Oceania',
 'South America']

In [21]:
ocurrences_df['continent'].value_counts()

continent
North America    4050
Africa           3172
South America    1782
Oceania          1408
Asia              971
Antarctica        792
Europe             52
Name: count, dtype: int64

In [22]:
events_df.sort_values(by='date', ascending=False).head(4)

Unnamed: 0,event_id,title,description,link,categories,sources,closed,status,category_titles,date,longitude,latitude,continent,country
49,EONET_15984,"RX Coleman 5431 Prescribed Fire, Coleman, Texas",,https://eonet.gsfc.nasa.gov/api/v3/events/EONE...,['Wildfires'],['IRWIN'],,,Wildfires,2025-11-20 09:18:00+00:00,-99.390833,31.950833,North America,United States
50,EONET_15987,"FY25 WEST CASTLE ROCK RX Prescribed Fire, Malh...",,https://eonet.gsfc.nasa.gov/api/v3/events/EONE...,['Wildfires'],['IRWIN'],,,Wildfires,2025-11-18 17:59:00+00:00,-118.183545,44.027127,North America,United States
51,EONET_15986,"Concord Fire Wildfire, Montgomery, Alabama","6 Miles NW from Talladega, AL",https://eonet.gsfc.nasa.gov/api/v3/events/EONE...,['Wildfires'],['IRWIN'],,,Wildfires,2025-11-18 00:56:00+00:00,-86.177222,32.436944,North America,United States
52,EONET_15898,Tropical Cyclone Fina,,https://eonet.gsfc.nasa.gov/api/v3/events/EONE...,['Severe Storms'],['JTWC'],,,Severe Storms,2025-11-18 00:00:00+00:00,130.5,-10.2,Oceania,Australia


In [23]:
ocurrences_df_agg_daily = ocurrences_df.groupby(ocurrences_df['date']).agg(count=('date', 'size')).reset_index()
ocurrences_df_agg_daily['date'] = pd.to_datetime(ocurrences_df_agg_daily['date'])

max_date = ocurrences_df_agg_daily["date"].max()
print(f"Max date: {max_date}")

ocurrences_df[ocurrences_df["date"] == max_date]


Max date: 2025-11-21 06:00:00+00:00


Unnamed: 0,event_id,event_title,event_description,event_link,category_ids,category_titles,occurrence_date,occurrence_type,longitude,latitude,month,day,region,country,continent,date
6154,EONET_15898,Tropical Cyclone Fina,,https://eonet.gsfc.nasa.gov/api/v3/events/EONE...,severeStorms,Severe Storms,,Point,132.4,-10.7,11,21,Northern Territory,Australia,Oceania,2025-11-21 06:00:00+00:00
