<a href="https://colab.research.google.com/github/jmo2171/ml-climate/blob/main/wildfire_data_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# setup

### imports

In [21]:
import zipfile
import sqlite3
import pandas as pd
from tqdm import tqdm
import numpy as np
import glob
import ast

In [2]:
import datetime

def julian_to_datetime(julian_date):
    # Julian Day 0 = 4713-11-24 BCE (proleptic Gregorian calendar)
    # Astronomical JD 2440587.5 = Unix time 0 (1970-01-01T00:00:00Z)
    unix_time_seconds = (julian_date - 2440587.5) * 86400.0
    return datetime.datetime.utcfromtimestamp(unix_time_seconds)

### google drive

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### google earth engine

In [4]:
import ee

In [5]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize(project='cvproject-421022')

# load 1.88 mil wildfire data

In [6]:
with zipfile.ZipFile('/content/drive/MyDrive/Spring 2025/ML and Climate/wildfire_project/fod.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/fod')


In [8]:
con = sqlite3.connect('/content/fod/FPA_FOD_20170508.sqlite')
cur = con.cursor()

# for row in cur.execute('SELECT COUNT(*) FROM Fires F WHERE F.STATE="CA"'):
#   print(row)


# for row in cur.execute('SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR, DISCOVERY_DATE, STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE FROM Fires F WHERE F.STATE="CA" LIMIT 2'):
#   print(row)
# con.close()

ca_fires = pd.read_sql('SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR, DISCOVERY_DATE, STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE FROM Fires F WHERE F.STATE="CA"', con)
# all_fires = pd.read_sql('SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR, DISCOVERY_DATE, STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE FROM Fires F', con)


In [None]:
test_fires = pd.read_sql('SELECT FOD_ID, FIRE_NAME, FIRE_SIZE, FIRE_SIZE_CLASS, FIRE_YEAR, DISCOVERY_DATE, STAT_CAUSE_DESCR, STATE, LATITUDE, LONGITUDE FROM Fires F LIMIT 15', con)
test_fires

In [9]:
# ca_fires[ca_fires['FIRE_SIZE_CLASS']=='E']
ca_fires['FIRE_SIZE_CLASS'].unique()

array(['A', 'B', 'G', 'C', 'F', 'D', 'E'], dtype=object)

In [12]:
ca_fires["date"] = ca_fires["DISCOVERY_DATE"].apply(julian_to_datetime)

In [101]:
print(ca_fires['FOD_ID'].nunique())
print(ca_fires['date'].nunique())
ca_fires[(ca_fires['LATITUDE']==37.375) & (ca_fires['LONGITUDE']==-120.62305555) & (ca_fires['date']=='2007-09-28 0:00:00')]

print(ca_fires[ca_fires['date']>='2000-02-18'].shape[0])
ca_2000_fires = ca_fires[ca_fires['date']>='2000-02-18']

189550
8324
121328


# augment with earth engine data

## single point test

In [74]:
gridmet = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET')
# maximumTemperature = dataset.select('tmmx')



row = ca_fires.iloc[0]
print(row)
print(julian_to_datetime(row['DISCOVERY_DATE']))
idx = 1
point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
feature = ee.Feature(point, {
            'id': str(idx),
            'date': julian_to_datetime(row['DISCOVERY_DATE'])
        })
date = ee.Date(feature.get('date'))

image = gridmet.filterDate(date, date.advance(1, 'day')).first()
max_temp = image.select('tmmx')
# print(image)
max_temp_value = max_temp.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('tmmx').getInfo()
print(max_temp_value)
# Print the elevation near Lyon, France.
# lst_urban_point = dataset.mean().sample(u_poi, scale).first().get('tmmx').getInfo()
# print(lst_urban_point - 273.15)
# print('Average daytime LST at urban point:', round(lst_urban_point*0.02 -273.15, 2), '°C')


FOD_ID                          1
FIRE_NAME                FOUNTAIN
FIRE_SIZE                     0.1
FIRE_SIZE_CLASS                 A
FIRE_YEAR                    2005
DISCOVERY_DATE          2453403.5
STAT_CAUSE_DESCR    Miscellaneous
STATE                          CA
LATITUDE                40.036944
LONGITUDE             -121.005833
Name: 0, dtype: object
2005-02-02 00:00:00
280.87799072265625


In [65]:
pop = ee.ImageCollection('CIESIN/GPWv411/GPW_Population_Density')

row = ca_fires.iloc[0]
print(row)
print(julian_to_datetime(row['DISCOVERY_DATE']))
idx = 1
point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
feature = ee.Feature(point, {
            'id': str(idx),
            'date': julian_to_datetime(row['DISCOVERY_DATE']),
            "year": 5 * round(julian_to_datetime(row['DISCOVERY_DATE']).year/5)
        })
year = feature.get('year')
date = ee.Date.fromYMD(year, 1, 1)

image = pop.filterDate(date, date.advance(1, 'year')).first()
# print(image)


population_density = image.select('population_density')

# Sample at the feature's location
population_density_value = population_density.reduceRegion(
    reducer=ee.Reducer.first(),
    geometry=feature.geometry(),
    scale=1000).get('population_density').getInfo()

print(population_density_value)


FOD_ID                                1
FIRE_NAME                      FOUNTAIN
FIRE_SIZE                           0.1
FIRE_SIZE_CLASS                       A
FIRE_YEAR                          2005
DISCOVERY_DATE                2453403.5
STAT_CAUSE_DESCR          Miscellaneous
STATE                                CA
LATITUDE                      40.036944
LONGITUDE                   -121.005833
date                2005-02-02 00:00:00
Name: 0, dtype: object
2005-02-02 00:00:00
4.473303318023682


In [84]:
veg = ee.ImageCollection('NASA/VIIRS/002/VNP13A1')


row = ca_fires.iloc[0]
print(row)
print(julian_to_datetime(row['DISCOVERY_DATE']))
idx = 1
point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
feature = ee.Feature(point, {
            'id': str(idx),
            'date': julian_to_datetime(row['DISCOVERY_DATE']),
            "year": 5 * round(julian_to_datetime(row['DISCOVERY_DATE']).year/5)
        })

date = ee.Date(feature.get('date'))
image = veg.filterDate(date, date.advance(1, 'month')).first()# print(image)
vegetation = image.select('EVI')

population_density = image.select('population_density')
# Sample at the feature's location
vegetation_value = vegetation.reduceRegion(
    reducer=ee.Reducer.first(),
    geometry=feature.geometry(),
    scale=10000
).get('EVI').getInfo()

print(vegetation_value)

FOD_ID                                1
FIRE_NAME                      FOUNTAIN
FIRE_SIZE                           0.1
FIRE_SIZE_CLASS                       A
FIRE_YEAR                          2005
DISCOVERY_DATE                2453403.5
STAT_CAUSE_DESCR          Miscellaneous
STATE                                CA
LATITUDE                      40.036944
LONGITUDE                   -121.005833
date                2005-02-02 00:00:00
Name: 0, dtype: object
2005-02-02 00:00:00


EEException: Image.select: Parameter 'input' is required and may not be null.

In [None]:
veg = ee.ImageCollection('NASA/VIIRS/002/VNP13A1')


row = ca_fires.iloc[0]
print(row)
print(julian_to_datetime(row['DISCOVERY_DATE']))
idx = 1
point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
feature = ee.Feature(point, {
            'id': str(idx),
            'date': julian_to_datetime(row['DISCOVERY_DATE']),
            "year": 5 * round(julian_to_datetime(row['DISCOVERY_DATE']).year/5)
        })

date = ee.Date(feature.get('date'))
image = veg.filterDate(date, date.advance(1, 'month')).first()# print(image)
vegetation = image.select('EVI')

population_density = image.select('population_density')
# Sample at the feature's location
vegetation_value = vegetation.reduceRegion(
    reducer=ee.Reducer.first(),
    geometry=feature.geometry(),
    scale=10000
).get('EVI').getInfo()

print(vegetation_value)


def sample_drought(feature):

    date = ee.Date(feature.get('date'))
    image = veg.filterDate(date, date.advance(1, 'week')).first()

    drought = image.select('pdsi')

    # Sample at the feature's location
    drought_value = drought.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=10000
    ).get('pdsi')


    return feature.set({
        'drought': drought_value,
    })

## batch processing

In [73]:
tqdm.pandas()

def df_to_fc(df):
    features = []
    for idx, row in tqdm(df.iterrows()):
        point = ee.Geometry.Point([row['LONGITUDE'], row['LATITUDE']])
        feature = ee.Feature(point, {
            'id': row['FOD_ID'],
            'date': julian_to_datetime(row['DISCOVERY_DATE']),
            "year": max(5 * round(julian_to_datetime(row['DISCOVERY_DATE']).year/5), 2000)
        })
        features.append(feature)
    return ee.FeatureCollection(features)


### gridmet weather data


In [28]:
gridmet = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET')

def sample_weather(feature):
    date = ee.Date(feature.get('date'))
    image = gridmet.filterDate(date, date.advance(1, 'day')).first()

    max_temp = image.select('tmmx')
    min_temp = image.select('tmmn')
    humidity = image.select('sph')
    wind_speed = image.select('vs')
    precipitation = image.select('pr')
    energy_release = image.select('erc')
    dead_fuel_100 = image.select('fm100')
    dead_fuel_1000 = image.select('fm1000')

    # Sample at the feature's location
    max_temp_value = max_temp.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('tmmx')

    min_temp_value = min_temp.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('tmmn')

    humidity_value = humidity.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('sph')

    wind_speed_value = wind_speed.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('vs')

    precipitation_value = precipitation.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('pr')

    energy_release_value = energy_release.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('erc')

    dead_fuel_100_value = dead_fuel_100.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('fm100')

    dead_fuel_1000_value = dead_fuel_1000.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=1000
    ).get('fm1000')



    return feature.set({
        'max_temp_K': max_temp_value,
        'min_temp_K': min_temp_value,
        'humidity': humidity_value,
        'wind_speed' : wind_speed_value,
        'precipitation' : precipitation_value,
        'energy_release' : energy_release_value,
        'dead_fuel_100' : dead_fuel_100_value,
        'dead_fuel_1000' : dead_fuel_1000_value
    })



In [33]:
chunk_size = 30000
chunks = np.array_split(ca_fires, np.ceil(len(ca_fires) / chunk_size))



for idx, chunk in enumerate(chunks):
    print(f'Processing chunk {idx+1}/{len(chunks)}...')

    # Convert this chunk into a FeatureCollection
    fc_chunk = df_to_fc(chunk)

    # Sample weather
    sampled_chunk = fc_chunk.map(sample_weather)

    # Export this chunk
    task = ee.batch.Export.table.toDrive(
        collection=sampled_chunk,
        description=f'export_weather_points_batch_{idx}',
        fileFormat='CSV',
        folder='earthengine_try2',
        fileNamePrefix=f'weather_points_batch_{idx}'
    )
    task.start()

  return bound(*args, **kwds)


Processing chunk 1/7...


27079it [00:03, 7898.11it/s]


Processing chunk 2/7...


27079it [00:02, 9795.21it/s] 


Processing chunk 3/7...


27079it [00:02, 9756.23it/s]


Processing chunk 4/7...


27079it [00:03, 7316.97it/s]


Processing chunk 5/7...


27078it [00:02, 9944.99it/s] 


Processing chunk 6/7...


27078it [00:03, 7858.32it/s] 


Processing chunk 7/7...


27078it [00:03, 7574.15it/s]


### population gpwv4 data

In [75]:
pop = ee.ImageCollection('CIESIN/GPWv411/GPW_Population_Density')

def sample_population(feature):

    year = feature.get('year')
    date = ee.Date.fromYMD(year, 1, 1)
    image = pop.filterDate(date, date.advance(1, 'year')).first()

    population_density = image.select('population_density')

    # Sample at the feature's location
    population_density_value = population_density.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=10000
    ).get('population_density')


    return feature.set({
        'population_density': population_density_value,
    })



In [76]:
chunk_size = 20000
chunks = np.array_split(ca_fires, np.ceil(len(ca_fires) / chunk_size))

for idx, chunk in enumerate(chunks):
    print(f'Processing chunk {idx+1}/{len(chunks)}...')

    # Convert this chunk into a FeatureCollection
    fc_chunk = df_to_fc(chunk)

    # Sample weather
    sampled_chunk = fc_chunk.map(sample_population)

    # Export this chunk
    task = ee.batch.Export.table.toDrive(
        collection=sampled_chunk,
        description=f'export_population_points_batch_{idx}',
        fileFormat='CSV',
        folder='earthengine_try2',
        fileNamePrefix=f'population_points_batch_{idx}'
    )
    task.start()

  return bound(*args, **kwds)


Processing chunk 1/10...


18955it [00:03, 4760.40it/s]


Processing chunk 2/10...


18955it [00:02, 6901.05it/s]


Processing chunk 3/10...


18955it [00:02, 9401.28it/s]


Processing chunk 4/10...


18955it [00:02, 6717.44it/s]


Processing chunk 5/10...


18955it [00:02, 9274.08it/s]


Processing chunk 6/10...


18955it [00:03, 5257.61it/s]


Processing chunk 7/10...


18955it [00:02, 9348.45it/s]


Processing chunk 8/10...


18955it [00:02, 9380.33it/s]


Processing chunk 9/10...


18955it [00:02, 8014.86it/s]


Processing chunk 10/10...


18955it [00:02, 9419.84it/s]


### vegetation modi3

In [102]:
veg = ee.ImageCollection('MODIS/061/MOD13A2')

def sample_vegetation(feature):

    date = ee.Date(feature.get('date'))
    image = veg.filterDate(date, date.advance(1, 'month')).first()

    vegetation = image.select('EVI')

    # Sample at the feature's location
    vegetation_value = vegetation.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=10000
    ).get('EVI')


    return feature.set({
        'vegetation': vegetation_value,
    })



In [103]:
chunk_size = 20000
chunks = np.array_split(ca_2000_fires, np.ceil(len(ca_2000_fires) / chunk_size))

for idx, chunk in enumerate(chunks):
    print(f'Processing chunk {idx+1}/{len(chunks)}...')

    # Convert this chunk into a FeatureCollection
    fc_chunk = df_to_fc(chunk)

    # Sample weather
    sampled_chunk = fc_chunk.map(sample_vegetation)

    # Export this chunk
    task = ee.batch.Export.table.toDrive(
        collection=sampled_chunk,
        description=f'export_vegetation_points_batch_{idx}',
        fileFormat='CSV',
        folder='earthengine_try2',
        fileNamePrefix=f'vegetation_points_batch_{idx}'
    )
    task.start()

  return bound(*args, **kwds)


Processing chunk 1/7...


17333it [00:01, 9014.52it/s]


Processing chunk 2/7...


17333it [00:02, 6419.96it/s]


Processing chunk 3/7...


17333it [00:01, 9433.15it/s]


Processing chunk 4/7...


17333it [00:02, 6952.48it/s]


Processing chunk 5/7...


17332it [00:01, 9503.88it/s]


Processing chunk 6/7...


17332it [00:02, 7229.68it/s]


Processing chunk 7/7...


17332it [00:01, 9612.82it/s]


### drought gridmet drought

In [92]:
drought = ee.ImageCollection('GRIDMET/DROUGHT')

def sample_drought(feature):

    date = ee.Date(feature.get('date'))
    image = drought.filterDate(date, date.advance(1, 'week')).first()

    drought_index = image.select('pdsi')

    # Sample at the feature's location
    drought_index_value = drought_index.reduceRegion(
        reducer=ee.Reducer.first(),
        geometry=feature.geometry(),
        scale=10000
    ).get('pdsi')


    return feature.set({
        'drought_index': drought_index_value,
    })


In [93]:
chunk_size = 20000
chunks = np.array_split(ca_fires, np.ceil(len(ca_fires) / chunk_size))

for idx, chunk in enumerate(chunks):
    print(f'Processing chunk {idx+1}/{len(chunks)}...')

    # Convert this chunk into a FeatureCollection
    fc_chunk = df_to_fc(chunk)

    # Sample weather
    sampled_chunk = fc_chunk.map(sample_drought)

    # Export this chunk
    task = ee.batch.Export.table.toDrive(
        collection=sampled_chunk,
        description=f'export_drought_points_batch_{idx}',
        fileFormat='CSV',
        folder='earthengine_try2',
        fileNamePrefix=f'drought_points_batch_{idx}'
    )
    task.start()

  return bound(*args, **kwds)


Processing chunk 1/10...


18955it [00:03, 6277.87it/s]


Processing chunk 2/10...


18955it [00:02, 6516.10it/s]


Processing chunk 3/10...


18955it [00:02, 7755.82it/s]


Processing chunk 4/10...


18955it [00:02, 7610.46it/s]


Processing chunk 5/10...


18955it [00:02, 6391.86it/s]


Processing chunk 6/10...


18955it [00:02, 6830.59it/s]


Processing chunk 7/10...


18955it [00:03, 6182.53it/s]


Processing chunk 8/10...


18955it [00:03, 5303.91it/s]


Processing chunk 9/10...


18955it [00:01, 9542.37it/s]


Processing chunk 10/10...


18955it [00:02, 9439.79it/s]


# merge data

In [37]:
batch_files = glob.glob('/content/drive/MyDrive/earthengine_try2/weather_points_batch_*.csv')

exported_df = pd.concat((pd.read_csv(f) for f in batch_files), ignore_index=True)
exported_df.rename(columns={'id': 'FOD_ID'}, inplace=True)
exported_df.drop_duplicates(subset=["FOD_ID"], inplace=True)
print(ca_fires['FOD_ID'].nunique())

print(exported_df.columns.tolist())



189550
['system:index', 'date', 'dead_fuel_100', 'dead_fuel_1000', 'energy_release', 'humidity', 'FOD_ID', 'max_temp_K', 'min_temp_K', 'precipitation', 'wind_speed', '.geo']


In [39]:

merged_df = ca_fires.merge(exported_df[['FOD_ID', 'dead_fuel_100', 'dead_fuel_1000', 'energy_release', 'humidity', 'max_temp_K', 'min_temp_K', 'precipitation', 'wind_speed']],
                           on='FOD_ID', how='left')

# 4. Convert from Kelvin to Celsius
merged_df['max_temp_C'] = merged_df['max_temp_K'] - 273.15
merged_df['min_temp_C'] = merged_df['min_temp_K'] - 273.15


In [77]:
batch_files = glob.glob('/content/drive/MyDrive/earthengine_try2/population_points_batch_*.csv')

exported_df = pd.concat((pd.read_csv(f) for f in batch_files), ignore_index=True)
exported_df.rename(columns={'id': 'FOD_ID'}, inplace=True)
exported_df.drop_duplicates(subset=["FOD_ID"], inplace=True)


In [78]:
merged_df = merged_df.merge(exported_df[['FOD_ID', 'population_density']],
                           on='FOD_ID', how='left')

In [104]:
batch_files = glob.glob('/content/drive/MyDrive/earthengine_try2/vegetation_points_batch_*.csv')

exported_df = pd.concat((pd.read_csv(f) for f in batch_files), ignore_index=True)
exported_df.rename(columns={'id': 'FOD_ID'}, inplace=True)
exported_df.drop_duplicates(subset=["FOD_ID"], inplace=True)


In [105]:
merged_df = merged_df.merge(exported_df[['FOD_ID', 'vegetation']],
                           on='FOD_ID', how='left')

In [106]:
batch_files = glob.glob('/content/drive/MyDrive/earthengine_try2/drought_points_batch_*.csv')

exported_df = pd.concat((pd.read_csv(f) for f in batch_files), ignore_index=True)
exported_df.rename(columns={'id': 'FOD_ID'}, inplace=True)
exported_df.drop_duplicates(subset=["FOD_ID"], inplace=True)


In [107]:
merged_df = merged_df.merge(exported_df[['FOD_ID', 'drought_index']],
                           on='FOD_ID', how='left')

In [115]:
print(merged_df.columns.tolist())


['FOD_ID', 'FIRE_NAME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS', 'FIRE_YEAR', 'DISCOVERY_DATE', 'STAT_CAUSE_DESCR', 'STATE', 'LATITUDE', 'LONGITUDE', 'date', 'dead_fuel_100', 'dead_fuel_1000', 'energy_release', 'humidity', 'max_temp_K', 'min_temp_K', 'precipitation', 'wind_speed', 'max_temp_C', 'min_temp_C', 'population_density', 'vegetation', 'drought_index']


In [112]:
merged_df

Unnamed: 0,FOD_ID,FIRE_NAME,FIRE_SIZE,FIRE_SIZE_CLASS,FIRE_YEAR,DISCOVERY_DATE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,...,humidity,max_temp_K,min_temp_K,precipitation,wind_speed,max_temp_C,min_temp_C,population_density,vegetation,drought_index
0,1,FOUNTAIN,0.10,A,2005,2453403.5,Miscellaneous,CA,40.036944,-121.005833,...,0.002854,280.877991,271.268524,0.0,6.929889,7.727991,-1.881476,1.478607,2929.0,-0.3500
1,2,PIGEON,0.25,A,2004,2453137.5,Lightning,CA,38.933056,-120.404444,...,0.003493,289.097748,273.432831,0.0,3.152111,15.947748,0.282831,0.004660,3590.0,-2.2075
2,3,SLACK,0.10,A,2004,2453156.5,Debris Burning,CA,38.984167,-120.735556,...,0.005902,301.037048,286.321106,0.0,1.806214,27.887048,13.171106,6.677308,4518.0,-2.7800
3,4,DEER,0.10,A,2004,2453184.5,Lightning,CA,38.559167,-119.913333,...,0.004578,294.351685,281.062378,0.0,2.183747,21.201685,7.912378,0.000000,2384.0,-2.6175
4,5,STEVENOT,0.10,A,2004,2453184.5,Lightning,CA,38.559167,-119.933056,...,0.004578,294.351685,281.062378,0.0,2.183747,21.201685,7.912378,0.000000,2384.0,-2.6175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189545,300348363,ODESSA 2,0.01,A,2015,2457291.5,Missing/Undefined,CA,40.481637,-122.389375,...,0.006131,307.156189,288.919708,0.0,2.437995,34.006189,15.769708,40.920891,2216.0,-3.3700
189546,300348373,,0.20,A,2015,2457300.5,Miscellaneous,CA,37.617619,-120.938570,...,0.006831,299.812439,286.783722,0.0,1.300694,26.662439,13.633722,646.360901,2263.0,-2.6900
189547,300348375,,0.10,A,2015,2457144.5,Missing/Undefined,CA,37.617619,-120.938570,...,0.006538,305.998657,286.920319,0.0,3.041790,32.848657,13.770319,646.360901,2653.0,-3.1725
189548,300348377,,2.00,B,2015,2457309.5,Missing/Undefined,CA,37.672235,-120.898356,...,0.007374,305.406464,290.854279,0.0,3.567064,32.256464,17.704279,1565.474243,2068.0,-2.7975


In [114]:
merged_df.to_csv("/content/drive/MyDrive/Spring 2025/ML and Climate/wildfire_project/fire_earth_data.csv", index=False)