# setup

### imports

In [None]:
import zipfile
import sqlite3
import pandas as pd
from tqdm import tqdm
import numpy as np
import glob
import ast

In [None]:
import datetime

def julian_to_datetime(julian_date):
    # Julian Day 0 = 4713-11-24 BCE (proleptic Gregorian calendar)
    # Astronomical JD 2440587.5 = Unix time 0 (1970-01-01T00:00:00Z)
    unix_time_seconds = (julian_date - 2440587.5) * 86400.0
    return datetime.datetime.utcfromtimestamp(unix_time_seconds)

### google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### google earth engine

In [None]:
import ee

In [None]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize(project='cvproject-421022')

# load 1.88 mil wildfire data

In [None]:
with zipfile.ZipFile('/content/drive/MyDrive/Spring 2025/ML and Climate/wildfire_project/fod.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/fod')


In [None]:
con = sqlite3.connect('/content/fod/FPA_FOD_20170508.sqlite')
cur = con.cursor()

# for row in cur.execute('SELECT COUNT(*) FROM Fires F WHERE F.STATE="CA"'):
#   print(row)


# for row in cur.execute('SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR, DISCOVERY_DATE, STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE FROM Fires F WHERE F.STATE="CA" LIMIT 2'):
#   print(row)
# con.close()

# all_fires = pd.read_sql('SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR, DISCOVERY_DATE, STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE FROM Fires F', con)
cur.execute("PRAGMA table_info(Fires)")
columns = [row[1] for row in cur.fetchall()]

print(columns)
pd.read_sql('SELECT DISCOVERY_DOY FROM Fires F WHERE F.STATE="CA" LIMIT 10', con)


['OBJECTID', 'FOD_ID', 'FPA_ID', 'SOURCE_SYSTEM_TYPE', 'SOURCE_SYSTEM', 'NWCG_REPORTING_AGENCY', 'NWCG_REPORTING_UNIT_ID', 'NWCG_REPORTING_UNIT_NAME', 'SOURCE_REPORTING_UNIT', 'SOURCE_REPORTING_UNIT_NAME', 'LOCAL_FIRE_REPORT_ID', 'LOCAL_INCIDENT_ID', 'FIRE_CODE', 'FIRE_NAME', 'ICS_209_INCIDENT_NUMBER', 'ICS_209_NAME', 'MTBS_ID', 'MTBS_FIRE_NAME', 'COMPLEX_NAME', 'FIRE_YEAR', 'DISCOVERY_DATE', 'DISCOVERY_DOY', 'DISCOVERY_TIME', 'STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR', 'CONT_DATE', 'CONT_DOY', 'CONT_TIME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'LATITUDE', 'LONGITUDE', 'OWNER_CODE', 'OWNER_DESCR', 'STATE', 'COUNTY', 'FIPS_CODE', 'FIPS_NAME', 'Shape']


Unnamed: 0,DISCOVERY_DOY
0,33
1,133
2,152
3,180
4,180
5,182
6,183
7,67
8,74
9,183


In [None]:
ca_fires = pd.read_sql('SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR, DISCOVERY_DATE, STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE, DISCOVERY_DOY FROM Fires F WHERE F.STATE="CA"', con)


In [None]:
test_fires = pd.read_sql('SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR, DISCOVERY_DATE, STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE FROM Fires F LIMIT 15', con)
test_fires

In [None]:
query = '''
SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR,
       DISCOVERY_DATE, CONT_DATE, (CONT_DATE - DISCOVERY_DATE) AS DURATION_DAYS,
       STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE, DISCOVERY_DOY
FROM Fires F
WHERE (CONT_DATE - DISCOVERY_DATE) > 1
'''
all_long_fires = pd.read_sql(query, con)


In [None]:
all_long_fires

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,FIRE_YEAR,DISCOVERY_DATE,CONT_DATE,DURATION_DAYS,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,DISCOVERY_DOY
0,4,DEER,0.1,A,2004,2453184.5,2453189.5,5.0,Lightning,CA,38.559167,-119.913333,180
1,5,STEVENOT,0.1,A,2004,2453184.5,2453189.5,5.0,Lightning,CA,38.559167,-119.933056,180
2,17,POWER,16823.0,G,2004,2453284.5,2453299.5,15.0,Equipment Use,CA,38.523333,-120.211667,280
3,18,FREDS,7700.0,G,2004,2453291.5,2453295.5,4.0,Equipment Use,CA,38.780000,-120.260000,287
4,25,EAGLE,2.5,B,2004,2453187.5,2453192.5,5.0,Lightning,NM,33.545278,-105.229444,183
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77776,300346892,4-1,0.1,A,2010,2455402.5,2455404.5,2.0,Miscellaneous,CA,41.273543,-120.580424,206
77777,300347461,VALLEY,480.0,E,2014,2456814.5,2456816.5,2.0,Equipment Use,CA,37.600389,-120.191168,157
77778,300347650,MAIN1,0.1,A,2014,2456841.5,2456845.5,4.0,Arson,CA,36.032229,-118.857999,184
77779,300347964,HAPPY,30.0,C,2015,2457232.5,2457237.5,5.0,Missing/Undefined,CA,40.469090,-122.397660,210


In [None]:
all_long_fires["date"] = all_long_fires["DISCOVERY_DATE"].apply(julian_to_datetime)
print(all_long_fires[all_long_fires['date']>='2000-02-18'].shape[0])
all_long_fires = all_long_fires[all_long_fires['date']>='2000-02-18']

61200


In [None]:
# ca_fires[ca_fires['FIRE_SIZE_CLASS']=='E']
ca_fires['FIRE_SIZE_CLASS'].unique()

array(['A', 'B', 'G', 'C', 'F', 'D', 'E'], dtype=object)

In [None]:
ca_fires["date"] = ca_fires["DISCOVERY_DATE"].apply(julian_to_datetime)

In [None]:
print(ca_fires['FOD_ID'].nunique())
print(ca_fires['date'].nunique())
ca_fires[(ca_fires['LATITUDE']==37.375) & (ca_fires['LONGITUDE']==-120.62305555) & (ca_fires['date']=='2007-09-28 0:00:00')]

print(ca_fires[ca_fires['date']>='2000-02-18'].shape[0])
ca_2000_fires = ca_fires[ca_fires['date']>='2000-02-18']

189550
8324
121328


# augment with earth engine data

## single point test

In [None]:
gridmet = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET')
# maximumTemperature = dataset.select('tmmx')



row = ca_fires.iloc[0]
print(row)
print(julian_to_datetime(row['DISCOVERY_DATE']))
idx = 1
point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
feature = ee.Feature(point, {
            'id': str(idx),
            'date': julian_to_datetime(row['DISCOVERY_DATE'])
        })
date = ee.Date(feature.get('date'))

image = gridmet.filterDate(date, date.advance(1, 'day')).first()
max_temp = image.select('tmmx')
# print(image)
max_temp_value = max_temp.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('tmmx').getInfo()
print(max_temp_value)
# Print the elevation near Lyon, France.
# lst_urban_point = dataset.mean().sample(u_poi, scale).first().get('tmmx').getInfo()
# print(lst_urban_point - 273.15)
# print('Average daytime LST at urban point:', round(lst_urban_point*0.02 -273.15, 2), '°C')


FOD_ID                          1
FIRE_NAME                FOUNTAIN
FIRE_SIZE                     0.1
FIRE_SIZE_CLASS                 A
FIRE_YEAR                    2005
DISCOVERY_DATE          2453403.5
STAT_CAUSE_DESCR    Miscellaneous
STATE                          CA
LATITUDE                40.036944
LONGITUDE             -121.005833
Name: 0, dtype: object
2005-02-02 00:00:00
280.87799072265625


In [None]:
pop = ee.ImageCollection('CIESIN/GPWv411/GPW_Population_Density')

row = ca_fires.iloc[0]
print(row)
print(julian_to_datetime(row['DISCOVERY_DATE']))
idx = 1
point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
feature = ee.Feature(point, {
            'id': str(idx),
            'date': julian_to_datetime(row['DISCOVERY_DATE']),
            "year": 5 * round(julian_to_datetime(row['DISCOVERY_DATE']).year/5)
        })
year = feature.get('year')
date = ee.Date.fromYMD(year, 1, 1)

image = pop.filterDate(date, date.advance(1, 'year')).first()
# print(image)


population_density = image.select('population_density')

# Sample at the feature's location
population_density_value = population_density.reduceRegion(
    reducer=ee.Reducer.first(),
    geometry=feature.geometry(),
    scale=1000).get('population_density').getInfo()

print(population_density_value)


FOD_ID                                1
FIRE_NAME                      FOUNTAIN
FIRE_SIZE                           0.1
FIRE_SIZE_CLASS                       A
FIRE_YEAR                          2005
DISCOVERY_DATE                2453403.5
STAT_CAUSE_DESCR          Miscellaneous
STATE                                CA
LATITUDE                      40.036944
LONGITUDE                   -121.005833
date                2005-02-02 00:00:00
Name: 0, dtype: object
2005-02-02 00:00:00
4.473303318023682


In [None]:
veg = ee.ImageCollection('NASA/VIIRS/002/VNP13A1')


row = ca_fires.iloc[0]
print(row)
print(julian_to_datetime(row['DISCOVERY_DATE']))
idx = 1
point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
feature = ee.Feature(point, {
            'id': str(idx),
            'date': julian_to_datetime(row['DISCOVERY_DATE']),
            "year": 5 * round(julian_to_datetime(row['DISCOVERY_DATE']).year/5)
        })

date = ee.Date(feature.get('date'))
image = veg.filterDate(date, date.advance(1, 'month')).first()# print(image)
vegetation = image.select('EVI')

population_density = image.select('population_density')
# Sample at the feature's location
vegetation_value = vegetation.reduceRegion(
    reducer=ee.Reducer.first(),
    geometry=feature.geometry(),
    scale=10000
).get('EVI').getInfo()

print(vegetation_value)

FOD_ID                                1
FIRE_NAME                      FOUNTAIN
FIRE_SIZE                           0.1
FIRE_SIZE_CLASS                       A
FIRE_YEAR                          2005
DISCOVERY_DATE                2453403.5
STAT_CAUSE_DESCR          Miscellaneous
STATE                                CA
LATITUDE                      40.036944
LONGITUDE                   -121.005833
date                2005-02-02 00:00:00
Name: 0, dtype: object
2005-02-02 00:00:00


EEException: Image.select: Parameter 'input' is required and may not be null.

In [None]:
veg = ee.ImageCollection('NASA/VIIRS/002/VNP13A1')


row = ca_fires.iloc[0]
print(row)
print(julian_to_datetime(row['DISCOVERY_DATE']))
idx = 1
point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
feature = ee.Feature(point, {
            'id': str(idx),
            'date': julian_to_datetime(row['DISCOVERY_DATE']),
            "year": 5 * round(julian_to_datetime(row['DISCOVERY_DATE']).year/5)
        })

date = ee.Date(feature.get('date'))
image = veg.filterDate(date, date.advance(1, 'month')).first()# print(image)
vegetation = image.select('EVI')

population_density = image.select('population_density')
# Sample at the feature's location
vegetation_value = vegetation.reduceRegion(
    reducer=ee.Reducer.first(),
    geometry=feature.geometry(),
    scale=10000
).get('EVI').getInfo()

print(vegetation_value)


def sample_drought(feature):

    date = ee.Date(feature.get('date'))
    image = veg.filterDate(date, date.advance(1, 'week')).first()

    drought = image.select('pdsi')

    # Sample at the feature's location
    drought_value = drought.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=10000
    ).get('pdsi')


    return feature.set({
        'drought': drought_value,
    })

## batch processing

In [None]:
tqdm.pandas()

def df_to_fc(df):
    features = []
    for idx, row in tqdm(df.iterrows()):
        point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
        feature = ee.Feature(point, {
            'id': row['FOD_ID'],
            'date': julian_to_datetime(row['DISCOVERY_DATE']),
            "year": max(5 * round(julian_to_datetime(row['DISCOVERY_DATE']).year/5), 2000)
        })
        features.append(feature)
    return ee.FeatureCollection(features)


### gridmet weather data


In [None]:
gridmet = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET')

def sample_weather(feature):
    date = ee.Date(feature.get('date'))
    image = gridmet.filterDate(date, date.advance(1, 'day')).first()

    max_temp = image.select('tmmx')
    min_temp = image.select('tmmn')
    humidity = image.select('sph')
    wind_speed = image.select('vs')
    precipitation = image.select('pr')
    energy_release = image.select('erc')
    dead_fuel_100 = image.select('fm100')
    dead_fuel_1000 = image.select('fm1000')

    # Sample at the feature's location
    max_temp_value = max_temp.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('tmmx')

    min_temp_value = min_temp.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('tmmn')

    humidity_value = humidity.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('sph')

    wind_speed_value = wind_speed.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('vs')

    precipitation_value = precipitation.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('pr')

    energy_release_value = energy_release.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('erc')

    dead_fuel_100_value = dead_fuel_100.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('fm100')

    dead_fuel_1000_value = dead_fuel_1000.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('fm1000')



    return feature.set({
        'max_temp_K': max_temp_value,
        'min_temp_K': min_temp_value,
        'humidity': humidity_value,
        'wind_speed' : wind_speed_value,
        'precipitation' : precipitation_value,
        'energy_release' : energy_release_value,
        'dead_fuel_100' : dead_fuel_100_value,
        'dead_fuel_1000' : dead_fuel_1000_value
    })



In [None]:
chunk_size = 20000
chunks = np.array_split(all_long_fires, np.ceil(len(all_long_fires) / chunk_size))



for idx, chunk in enumerate(chunks):
    print(f'Processing chunk {idx+1}/{len(chunks)}...')

    # Convert this chunk into a FeatureCollection
    fc_chunk = df_to_fc(chunk)

    # Sample weather
    sampled_chunk = fc_chunk.map(sample_weather)

    # Export this chunk
    task = ee.batch.Export.table.toDrive(
        collection=sampled_chunk,
        description=f'export_weather_points_batch_{idx}',
        fileFormat='CSV',
        folder='earthengine_long',
        fileNamePrefix=f'weather_points_batch_{idx}'
    )
    task.start()

  return bound(*args, **kwds)


Processing chunk 1/4...


15300it [00:02, 7165.47it/s]


Processing chunk 2/4...


15300it [00:02, 7348.70it/s]


Processing chunk 3/4...


15300it [00:02, 5700.67it/s]


Processing chunk 4/4...


15300it [00:02, 5340.08it/s]


### population gpwv4 data

In [None]:
pop = ee.ImageCollection('CIESIN/GPWv411/GPW_Population_Density')

def sample_population(feature):

    year = feature.get('year')
    date = ee.Date.fromYMD(year, 1, 1)
    image = pop.filterDate(date, date.advance(1, 'year')).first()

    population_density = image.select('population_density')

    # Sample at the feature's location
    population_density_value = population_density.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('population_density')


    return feature.set({
        'population_density': population_density_value,
    })



In [None]:
chunk_size = 20000
chunks = np.array_split(all_long_fires, np.ceil(len(all_long_fires) / chunk_size))

for idx, chunk in enumerate(chunks):
    print(f'Processing chunk {idx+1}/{len(chunks)}...')

    # Convert this chunk into a FeatureCollection
    fc_chunk = df_to_fc(chunk)

    # Sample weather
    sampled_chunk = fc_chunk.map(sample_population)

    # Export this chunk
    task = ee.batch.Export.table.toDrive(
        collection=sampled_chunk,
        description=f'export_population_points_batch_{idx}',
        fileFormat='CSV',
        folder='earthengine_long',
        fileNamePrefix=f'population_points_batch_{idx}'
    )
    task.start()

  return bound(*args, **kwds)


Processing chunk 1/4...


15300it [00:03, 4381.66it/s]


Processing chunk 2/4...


15300it [00:01, 7912.66it/s]


Processing chunk 3/4...


15300it [00:03, 4810.90it/s]


Processing chunk 4/4...


15300it [00:03, 4316.27it/s]


### vegetation modi3

In [None]:
veg = ee.ImageCollection('MODIS/061/MOD13A2')

def sample_vegetation(feature):

    date = ee.Date(feature.get('date'))
    image = veg.filterDate(date, date.advance(1, 'month')).first()

    vegetation = image.select('EVI')

    # Sample at the feature's location
    vegetation_value = vegetation.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('EVI')


    return feature.set({
        'vegetation': vegetation_value,
    })



In [None]:
chunk_size = 20000
chunks = np.array_split(all_long_fires, np.ceil(len(all_long_fires) / chunk_size))

for idx, chunk in enumerate(chunks):
    print(f'Processing chunk {idx+1}/{len(chunks)}...')

    # Convert this chunk into a FeatureCollection
    fc_chunk = df_to_fc(chunk)

    # Sample weather
    sampled_chunk = fc_chunk.map(sample_vegetation)

    # Export this chunk
    task = ee.batch.Export.table.toDrive(
        collection=sampled_chunk,
        description=f'export_vegetation_points_batch_{idx}',
        fileFormat='CSV',
        folder='earthengine_long',
        fileNamePrefix=f'vegetation_points_batch_{idx}'
    )
    task.start()

  return bound(*args, **kwds)


Processing chunk 1/4...


15300it [00:01, 9176.57it/s]


Processing chunk 2/4...


15300it [00:01, 8221.52it/s]


Processing chunk 3/4...


15300it [00:02, 5679.46it/s]


Processing chunk 4/4...


15300it [00:01, 9268.66it/s]


### drought gridmet drought

In [None]:
drought = ee.ImageCollection('GRIDMET/DROUGHT')

def sample_drought(feature):

    date = ee.Date(feature.get('date'))
    image = drought.filterDate(date, date.advance(1, 'week')).first()

    drought_index = image.select('pdsi')

    # Sample at the feature's location
    drought_index_value = drought_index.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=feature.geometry(),
        scale=10000
    ).get('pdsi')


    return feature.set({
        'drought_index': drought_index_value,
    })


In [None]:
chunk_size = 20000
chunks = np.array_split(all_long_fires, np.ceil(len(all_long_fires) / chunk_size))

for idx, chunk in enumerate(chunks):
    print(f'Processing chunk {idx+1}/{len(chunks)}...')

    # Convert this chunk into a FeatureCollection
    fc_chunk = df_to_fc(chunk)

    # Sample weather
    sampled_chunk = fc_chunk.map(sample_drought)

    # Export this chunk
    task = ee.batch.Export.table.toDrive(
        collection=sampled_chunk,
        description=f'export_drought_points_batch_{idx}',
        fileFormat='CSV',
        folder='earthengine_long',
        fileNamePrefix=f'drought_points_batch_{idx}'
    )
    task.start()

  return bound(*args, **kwds)


Processing chunk 1/4...


15300it [00:01, 8189.48it/s]


Processing chunk 2/4...


15300it [00:01, 9105.48it/s]


Processing chunk 3/4...


15300it [00:02, 5748.15it/s]


Processing chunk 4/4...


15300it [00:01, 8999.83it/s]


# merge data

In [None]:
batch_files = glob.glob('/content/drive/MyDrive/earthengine_long/weather_points_batch_*.csv')

exported_df = pd.concat((pd.read_csv(f) for f in batch_files), ignore_index=True)
exported_df.rename(columns={'id': 'FOD_ID'}, inplace=True)
exported_df.drop_duplicates(subset=["FOD_ID"], inplace=True)

# Convert from Kelvin to Celsius
exported_df['max_temp_C'] = exported_df['max_temp_K'] - 273.15
exported_df['min_temp_C'] = exported_df['min_temp_K'] - 273.15

print(ca_fires['FOD_ID'].nunique())

print(exported_df.columns.tolist())



189550
['system:index', 'date', 'dead_fuel_100', 'dead_fuel_1000', 'energy_release', 'humidity', 'FOD_ID', 'max_temp_K', 'min_temp_K', 'precipitation', 'wind_speed', 'year', '.geo', 'max_temp_C', 'min_temp_C']


In [None]:

merged_df = all_long_fires.merge(exported_df[['FOD_ID', 'dead_fuel_100', 'dead_fuel_1000', 'energy_release', 'humidity', 'max_temp_C', 'min_temp_C', 'precipitation', 'wind_speed']],
                           on='FOD_ID', how='left')




In [None]:
batch_files = glob.glob('/content/drive/MyDrive/earthengine_long/population_points_batch_*.csv')

exported_df = pd.concat((pd.read_csv(f) for f in batch_files), ignore_index=True)
exported_df.rename(columns={'id': 'FOD_ID'}, inplace=True)
exported_df.drop_duplicates(subset=["FOD_ID"], inplace=True)


In [None]:
merged_df = merged_df.merge(exported_df[['FOD_ID', 'population_density']],
                           on='FOD_ID', how='left')

In [None]:
batch_files = glob.glob('/content/drive/MyDrive/earthengine_long/vegetation_points_batch_*.csv')

exported_df = pd.concat((pd.read_csv(f) for f in batch_files), ignore_index=True)
exported_df.rename(columns={'id': 'FOD_ID'}, inplace=True)
exported_df.drop_duplicates(subset=["FOD_ID"], inplace=True)


In [None]:
merged_df = merged_df.merge(exported_df[['FOD_ID', 'vegetation']],
                           on='FOD_ID', how='left')

In [None]:
batch_files = glob.glob('/content/drive/MyDrive/earthengine_long/drought_points_batch_*.csv')

exported_df = pd.concat((pd.read_csv(f) for f in batch_files), ignore_index=True)
exported_df.rename(columns={'id': 'FOD_ID'}, inplace=True)
exported_df.drop_duplicates(subset=["FOD_ID"], inplace=True)


In [None]:
merged_df = merged_df.merge(exported_df[['FOD_ID', 'drought_index']],
                           on='FOD_ID', how='left')

In [None]:
print(merged_df.columns.tolist())


['FOD_ID', 'FIRE_NAME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'FIRE_YEAR', 'DISCOVERY_DATE', 'CONT_DATE', 'DURATION_DAYS', 'STAT_CAUSE_DESCR', 'STATE', 'LATITUDE', 'LONGITUDE', 'DISCOVERY_DOY', 'date', 'dead_fuel_100', 'dead_fuel_1000', 'energy_release', 'humidity', 'max_temp_C', 'min_temp_C', 'precipitation', 'wind_speed', 'population_density', 'vegetation', 'drought_index']


In [None]:
merged_df

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,FIRE_YEAR,DISCOVERY_DATE,CONT_DATE,DURATION_DAYS,STAT_CAUSE_DESCR,STATE,...,dead_fuel_1000,energy_release,humidity,max_temp_C,min_temp_C,precipitation,wind_speed,population_density,vegetation,drought_index
0,4,DEER,0.1,A,2004,2453184.5,2453189.5,5.0,Lightning,CA,...,8.259057,76.284248,0.004474,20.343042,7.937524,0.0,2.230986,0.000000,2384.0,-2.6175
1,5,STEVENOT,0.1,A,2004,2453184.5,2453189.5,5.0,Lightning,CA,...,8.259057,76.284248,0.004474,20.343042,7.937524,0.0,2.230986,0.000000,2384.0,-2.6175
2,17,POWER,16823.0,G,2004,2453284.5,2453299.5,15.0,Equipment Use,CA,...,7.771585,77.266144,0.004487,23.897638,9.720789,0.0,2.096183,0.000000,2080.0,-3.6375
3,18,FREDS,7700.0,G,2004,2453291.5,2453295.5,4.0,Equipment Use,CA,...,7.529254,78.958511,0.003525,23.471399,9.278833,0.0,3.542572,0.546957,1560.0,-3.3975
4,25,EAGLE,2.5,B,2004,2453187.5,2453192.5,5.0,Lightning,NM,...,9.743317,64.935120,0.003591,28.284021,11.238000,0.0,3.919152,0.325217,1913.0,0.4950
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61195,300346892,4-1,0.1,A,2010,2455402.5,2455404.5,2.0,Miscellaneous,CA,...,7.325874,81.276794,0.006939,31.968591,15.125696,0.0,2.236683,0.372284,1549.0,-3.9650
61196,300347461,VALLEY,480.0,E,2014,2456814.5,2456816.5,2.0,Equipment Use,CA,...,7.395751,83.421936,0.005318,33.718073,16.510370,0.0,3.303576,0.498421,1872.0,-4.7675
61197,300347650,MAIN1,0.1,A,2014,2456841.5,2456845.5,4.0,Arson,CA,...,4.720404,96.394150,0.007447,38.930841,21.166345,0.0,3.026281,8.687964,1662.0,-5.6475
61198,300347964,HAPPY,30.0,C,2015,2457232.5,2457237.5,5.0,Missing/Undefined,CA,...,6.130595,92.727119,0.004246,42.192865,22.842554,0.0,2.567572,40.920891,2493.0,-3.8075


In [None]:
merged_df.to_csv("/content/drive/MyDrive/Spring 2025/ML and Climate/wildfire_project/long_2000_mean_fire_earth_data.csv", index=False)

## MERGE IN DOY

In [None]:
old_merge = pd.read_csv("/content/drive/MyDrive/Spring 2025/ML and Climate/wildfire_project/ca_2000_mean_fire_earth_data.csv")

In [None]:

merged_df = ca_2000_fires[['FOD_ID', 'DISCOVERY_DOY']].merge(old_merge,
                           on='FOD_ID', how='left')




In [None]:
merged_df

Unnamed: 0,FOD_ID,DISCOVERY_DOY,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,FIRE_YEAR,DISCOVERY_DATE,STAT_CAUSE_DESCR,STATE,LATITUDE,...,dead_fuel_1000,energy_release,humidity,max_temp_C,min_temp_C,precipitation,wind_speed,population_density,vegetation,drought_index
0,1,33,FOUNTAIN,0.10,A,2005,2453403.5,Miscellaneous,CA,40.036944,...,21.963203,22.056320,0.002912,7.498254,-2.186072,0.0,6.620363,1.478607,2929.0,-0.3500
1,2,133,PIGEON,0.25,A,2004,2453137.5,Lightning,CA,38.933056,...,12.930136,47.595642,0.003536,15.886743,0.414148,0.0,3.208708,0.004660,3590.0,-2.2075
2,3,152,SLACK,0.10,A,2004,2453156.5,Debris Burning,CA,38.984167,...,11.582568,55.798500,0.005862,26.379541,13.118188,0.0,2.040178,6.677308,4518.0,-2.7800
3,4,180,DEER,0.10,A,2004,2453184.5,Lightning,CA,38.559167,...,8.259057,76.284248,0.004474,20.343042,7.937524,0.0,2.230986,0.000000,2384.0,-2.6175
4,5,180,STEVENOT,0.10,A,2004,2453184.5,Lightning,CA,38.559167,...,8.259057,76.284248,0.004474,20.343042,7.937524,0.0,2.230986,0.000000,2384.0,-2.6175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121323,300348363,269,ODESSA 2,0.01,A,2015,2457291.5,Missing/Undefined,CA,40.481637,...,8.107708,76.157257,0.006148,33.893488,16.492151,0.0,2.316175,40.920891,2216.0,-3.3700
121324,300348373,278,,0.20,A,2015,2457300.5,Miscellaneous,CA,37.617619,...,8.788897,64.763962,0.006864,26.668207,13.650201,0.0,1.466999,646.360901,2263.0,-2.6900
121325,300348375,122,,0.10,A,2015,2457144.5,Missing/Undefined,CA,37.617619,...,10.918760,63.418705,0.006537,32.813928,13.751520,0.0,3.065458,646.360901,2653.0,-3.1725
121326,300348377,287,,2.00,B,2015,2457309.5,Missing/Undefined,CA,37.672235,...,9.216611,67.622978,0.007353,32.346643,17.716425,0.0,3.537143,1565.474243,2068.0,-2.7975


In [None]:
merged_df.to_csv("/content/drive/MyDrive/Spring 2025/ML and Climate/wildfire_project/ca_2000_day_of_year_mean_fire_earth_data.csv", index=False)