# Import the Data and Wrangle to focus specifically on Bats

In [11]:
#Import necessary objects from other notebooks
%store -r data2025_dir site_dir


In [2]:
# Import necessary packages

# Importing and accessing CSC
import pandas as pd
import os

In [17]:
def load_csv_data(csv_filename, base_dir):
    """
    Loads a CSV file from a given base directory.
    
    Parameters:
        csv_filename (str): Name of the CSV file (e.g., 'fire_stats.csv')
        base_dir (str): Directory path where the file lives
        
    Returns:
        pd.DataFrame: Loaded DataFrame
    """
    csv_path = os.path.join(base_dir, csv_filename)
    
    try:
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded '{csv_filename}' with {len(df)} rows and {len(df.columns)} columns.")
        return df
    except Exception as e:
        print(f"⚠️ Could not load '{csv_filename}': {e}")
        return None


In [24]:
#Use the function load_csv_data to access your 
csv_filename = 'COFires_bats_2024.csv'
csv_df = load_csv_data(csv_filename, data2025_dir)
csv_df.head()

✅ Loaded 'COFires_bats_2024.csv' with 1874 rows and 20 columns.


Unnamed: 0,ET1-008,2024-07-22,0,1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.10,0.11,0.12,0.13,0.14,2024,2MU
0,ET1-008,2024-07-23,0,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2024,2MU
1,site,date,hi_pass,lo_pass,Myyu,Myca,Myci,Myvo,Mylu,Pahe,Anpa,Epfu,Lano,Coto,Myth,Tabr,Laci,Euma,year,area
2,2MU03812,2024-07-22,380,30,0,0,0,16,0,0,0,0,10,0,0,0,0,0,2024,CP1
3,2MU03812,2024-07-23,384,24,0,0,0,11,1,0,0,0,7,0,0,0,0,0,2024,CP1
4,CP1-007,2024-07-24,510,55,0,0,0,15,0,0,0,0,8,0,0,0,0,0,2024,CP1


# Data Wrangling

In [35]:
# Skip the first two rows and load the actual header row (row index 2)
csv_path = "/Users/erinzimmerman/earth-analytics/data2025/wildfire/COFires_bats_2024.csv"
csv_df = pd.read_csv(csv_path, skiprows=2)

# Preview the cleaned-up DataFrame
csv_df.head()


Unnamed: 0,site,date,hi_pass,lo_pass,Myyu,Myca,Myci,Myvo,Mylu,Pahe,Anpa,Epfu,Lano,Coto,Myth,Tabr,Laci,Euma,year,area
0,2MU03812,2024-07-22,380,30,0,0,0,16,0,0,0,0,10,0,0,0,0,0,2024,CP1
1,2MU03812,2024-07-23,384,24,0,0,0,11,1,0,0,0,7,0,0,0,0,0,2024,CP1
2,CP1-007,2024-07-24,510,55,0,0,0,15,0,0,0,0,8,0,0,0,0,0,2024,CP1
3,CP1-007,2024-07-25,207,31,0,0,0,4,0,0,0,0,2,0,0,0,0,0,2024,CP1
4,CP1-007,2024-07-26,337,15,0,0,0,3,0,0,0,0,0,0,1,1,1,0,2024,CP1


In [38]:
# Trim whitespace in string colums, just in case
csv_df['site'] = csv_df['site'].str.strip()
csv_df['area'] = csv_df['area'].str.strip()

# Check date types
csv_df.dtypes


site       object
date       object
hi_pass     int64
lo_pass     int64
Myyu        int64
Myca        int64
Myci        int64
Myvo        int64
Mylu        int64
Pahe        int64
Anpa        int64
Epfu        int64
Lano        int64
Coto        int64
Myth        int64
Tabr        int64
Laci        int64
Euma        int64
year        int64
area       object
dtype: object

In [39]:
# Convert dates from being objects to intigers. 
csv_df['date'] = pd.to_datetime(csv_df['date'], errors='coerce')



In [42]:
# Identify missing values
csv_df.isnull().sum()


site       0
date       0
hi_pass    0
lo_pass    0
Myyu       0
Myca       0
Myci       0
Myvo       0
Mylu       0
Pahe       0
Anpa       0
Epfu       0
Lano       0
Coto       0
Myth       0
Tabr       0
Laci       0
Euma       0
year       0
area       0
dtype: int64

In [44]:
# Narrow it down to the correct fire
### figure out which rows are part of Mullen Fire by looking for MU in the site nae
mullen_csv_df = csv_df[csv_df['site'].str.contains("MU", na=False)]

mullen_csv_df


Unnamed: 0,site,date,hi_pass,lo_pass,Myyu,Myca,Myci,Myvo,Mylu,Pahe,Anpa,Epfu,Lano,Coto,Myth,Tabr,Laci,Euma,year,area
0,2MU03812,2024-07-22,380,30,0,0,0,16,0,0,0,0,10,0,0,0,0,0,2024,CP1
1,2MU03812,2024-07-23,384,24,0,0,0,11,1,0,0,0,7,0,0,0,0,0,2024,CP1
908,MU1-033,2024-07-25,33,36,0,0,0,2,1,0,0,0,16,0,0,4,0,0,2024,ET2
909,MU1-033,2024-07-26,27,13,0,0,0,0,0,0,0,0,5,0,0,2,0,0,2024,ET2
910,MU1-033,2024-07-27,62,3,0,0,0,10,0,0,0,0,0,0,0,0,0,0,2024,ET2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1867,MU1-138,2024-07-12,3,6,0,0,0,0,0,0,0,0,3,0,0,1,0,0,2024,TRA
1868,MU1-138,2024-07-13,10,9,0,0,0,0,0,0,0,0,5,0,0,0,1,0,2024,TRA
1869,MU1-138,2024-07-14,14,12,0,0,0,1,1,0,0,0,8,0,0,0,0,0,2024,TRA
1870,MU1-138,2024-07-15,2,9,0,0,0,0,0,0,0,0,5,0,0,1,0,0,2024,TRA


In [47]:
import re

# Apply regex split into new columns
mullen_csv_df[['fire_code','zone_code', 'site_num']] = mullen_csv_df['site'].str.extract(r"([A-Z])(\d+)(\d*)")

mullen_csv_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mullen_csv_df[['fire_code','zone_code', 'site_num']] = mullen_csv_df['site'].str.extract(r"([A-Z])(\d+)(\d*)")


Unnamed: 0,site,date,hi_pass,lo_pass,Myyu,Myca,Myci,Myvo,Mylu,Pahe,...,Coto,Myth,Tabr,Laci,Euma,year,area,fire_code,zone_code,site_num
0,2MU03812,2024-07-22,380,30,0,0,0,16,0,0,...,0,0,0,0,0,2024,CP1,U,03812,
1,2MU03812,2024-07-23,384,24,0,0,0,11,1,0,...,0,0,0,0,0,2024,CP1,U,03812,
908,MU1-033,2024-07-25,33,36,0,0,0,2,1,0,...,0,0,4,0,0,2024,ET2,U,1,
909,MU1-033,2024-07-26,27,13,0,0,0,0,0,0,...,0,0,2,0,0,2024,ET2,U,1,
910,MU1-033,2024-07-27,62,3,0,0,0,10,0,0,...,0,0,0,0,0,2024,ET2,U,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1867,MU1-138,2024-07-12,3,6,0,0,0,0,0,0,...,0,0,1,0,0,2024,TRA,U,1,
1868,MU1-138,2024-07-13,10,9,0,0,0,0,0,0,...,0,0,0,1,0,2024,TRA,U,1,
1869,MU1-138,2024-07-14,14,12,0,0,0,1,1,0,...,0,0,0,0,0,2024,TRA,U,1,
1870,MU1-138,2024-07-15,2,9,0,0,0,0,0,0,...,0,0,1,0,0,2024,TRA,U,1,


In [None]:
# Store the essential information to import into the next notebook
%store 
