In [None]:
pip install geopandas

In [None]:
# Creation of initial fire and non-fire datapoints

import geopandas as gpd
import pandas as pd
import numpy as np
import random

# Load the shapefile containing the Riverside County boundaries
riverside_county = gpd.read_file("/content/Riverside County/Riverside_County.shp")

# Filter out any points that fall outside of the Riverside County boundaries
fire_data = pd.read_excel("/content/Fire Data, 18-22.xlsx")
fire_data = fire_data[(fire_data['latitude'] >= riverside_county.bounds['miny'].min()) &
                      (fire_data['latitude'] <= riverside_county.bounds['maxy'].max()) &
                      (fire_data['longitude'] >= riverside_county.bounds['minx'].min()) &
                      (fire_data['longitude'] <= riverside_county.bounds['maxx'].max())]

# Randomly generate 250 points within the filtered Riverside County boundaries for the no fire datapoints
npoints = 250;
no_fire_data = pd.DataFrame(np.zeros((npoints, 3)), columns=['Date', 'longitude', 'latitude'])
no_fire_data['longitude'] = np.random.uniform(riverside_county.bounds['minx'].min(), riverside_county.bounds['maxx'].max(), npoints)
no_fire_data['latitude'] = np.random.uniform(riverside_county.bounds['miny'].min(), riverside_county.bounds['maxy'].max(), npoints)

# Generate random dates within a range of dates for the no fire datapoints
start_date = pd.to_datetime('2022-01-01')
end_date = pd.to_datetime('2022-12-31')
no_fire_data['Date'] = [random.choice(pd.date_range(start=start_date, end=end_date)).date() for _ in range(npoints)]

# Randomly select 250 fire datapoints from the filtered fire data
fire_data = fire_data.sample(n=npoints)

# Add a new boolean column to indicate whether each point represents a fire or not
fire_data['fire'] = True
no_fire_data['fire'] = False

# Combine the no fire datapoints and fire datapoints into a single dataset
data = pd.concat([no_fire_data, fire_data], ignore_index=True)

In [None]:
print(data)

In [None]:
data.to_excel("train_test.xlsx", index=False)

In [None]:
# Creation of Shapefile with initial points

import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Load the xlsx file into a Pandas DataFrame
df = pd.read_excel('/content/train_test.xlsx')

# Convert the date field to a string field
df['Date'] = df['Date'].astype(str)

# Create a Point geometry column using the Longitude and Latitude columns
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]

# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=geometry)

# Save the GeoDataFrame as a shapefile with accompanying dbf and shx files
gdf.to_file('train_test_shape')

In [None]:
# https://code.earthengine.google.com/fb4d6517da6bb886cf946f77bc52800e

In [None]:
# Assigning a Weather Station to each datapoint

import pandas as pd
from math import radians, cos, sin, asin, sqrt

# Load the fire and weather datasets
fire_data = pd.read_csv('/content/NDVI_Values.csv')
weather_data = pd.read_excel('/content/Weather Station Names.xlsx')

# Define a function to calculate the distance between two sets of coordinates
def haversine(lat1, lon1, lat2, lon2):
    R = 6372.8

    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * asin(sqrt(a))

    distance_km = R * c
    return distance_km

# Loop through each data point in the fire dataset
closest_stations = []
for index, row in fire_data.iterrows():
    min_distance = float('inf')
    closest_station = None

    # Loop through each weather station coordinate in the weather dataset
    for windex, wrow in weather_data.iterrows():
        distance = haversine(row['latitude'], row['longitude'], wrow['Latitude'], wrow['Longitude'])

        # Keep track of the closest weather station
        if distance < min_distance:
            min_distance = distance
            closest_station = wrow['station_name']

    closest_stations.append(closest_station)

# Add the closest weather station name as a new column in the fire dataset
fire_data['closest_weather_station'] = closest_stations

# Save the updated fire dataset to a new xlsx file
fire_data.to_excel('fire_data_with_closest_weather_station.xlsx', index=False)

In [None]:
# Assigning Weather features to each datapoint

import pandas as pd
import os
import datetime as dt

# read in fire dataset
fire_data = pd.read_excel('/content/fire_data_with_closest_weather_station.xlsx')

# create empty dataframe to hold weather data
weather_data = pd.DataFrame(columns=['Date', 'Avg Temperature', 'Max Temperature', 'Min Temperature', 'Precipitation', 'Latitude', 'Longitude', 'closest_weather_station'])

# loop through weather files
for filename in os.listdir('/content/Individual Weather Stations'):
    if filename.endswith('.xlsx'):
        # read in weather data
        weather = pd.read_excel(os.path.join('/content/Individual Weather Stations', filename))
        # convert Date column to datetime format
        weather['Date'] = pd.to_datetime(weather['Date'], format='%m/%d/%Y')
        # extract weather station name from filename
        station_name = filename[:-5]
        # add station name as a column to weather data
        weather['closest_weather_station'] = station_name
        # append weather data to weather_data dataframe
        weather_data = pd.concat([weather_data, weather], ignore_index=True)

# convert Date column in fire_data to datetime format
fire_data['Date'] = pd.to_datetime(fire_data['Date'], format='%Y-%m-%d')

# loop through rows in fire_data and find corresponding weather data
for i, row in fire_data.iterrows():
    # extract latitude, longitude, and date from row
    lat = row['latitude']
    lon = row['longitude']
    date = row['Date']
    # calculate distances between fire location and weather stations
    distances = ((weather_data['Latitude'] - lat)**2 + (weather_data['Longitude'] - lon)**2)**0.5
    # find closest weather station
    closest_station = weather_data.loc[distances.idxmin(), 'closest_weather_station']
    # find weather data for closest station and closest date
    closest_weather = weather_data.loc[(weather_data['closest_weather_station'] == closest_station) & (weather_data['Date'] - date >= dt.timedelta(days=0))].iloc[0]
    # add weather data to fire_data
    fire_data.loc[i, 'Avg Temperature'] = closest_weather['Avg Temperature']
    fire_data.loc[i, 'Max Temperature'] = closest_weather['Max Temperature']
    fire_data.loc[i, 'Min Temperature'] = closest_weather['Min Temperature']
    fire_data.loc[i, 'Precipitation'] = closest_weather['Precipitation']
    fire_data.loc[i, 'closest_weather_station'] = closest_station

# write updated fire data to new file
fire_data.to_excel('/content/fire_data_with_weather.xlsx', index=False)

In [None]:
# Data Processing 

import pandas as pd

# Load the xlsx file
df = pd.read_excel('/content/fire_data_with_weather.xlsx')

# Extract the required columns
df = df[['fire', 'nd', 'Avg Temperature', 'Max Temperature', 'Min Temperature', 'Precipitation']]

# Rename the columns
df = df.rename(columns={'nd': 'NDVI', 'fire': 'Fire'})

# Convert the Fire column to boolean values
df['Fire'] = df['Fire'].astype(bool)

# Save the new csv file
df.to_csv('fire_dataset.csv', index=False)

In [None]:
# Data Processing

import pandas as pd

# Load the CSV file
data = pd.read_csv("/content/fire_dataset.csv")

# Remove rows with NaN or missing values
data.dropna(inplace=True)

# Save the cleaned dataset as a new CSV file
data.to_csv("fire_dataset_normal.csv", index=False)