#Install Dependencies

In [6]:
import pandas as pd
import numpy as np
import requests
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [7]:
pip install geopandas shapely dask



#Define necesary functions & download important data

In [8]:
import requests
import zipfile
import io
import pandas as pd

def download_extract_read_csv(url):
    """Downloads a zip file, extracts it locally, and reads the first CSV into a DataFrame."""
    response = requests.get(url)
    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
        zip_ref.extractall()
        csv_file = next((f for f in zip_ref.namelist() if f.endswith('.csv')), None)
        return pd.read_csv(csv_file) if csv_file else None

# Example usage:
url = "https://firms.modaps.eosdis.nasa.gov/data/country/zips/modis_2023_all_countries.zip"
df = download_extract_read_csv(url)
print(df.head() if df is not None else "No CSV found.")

KeyboardInterrupt: 

In [127]:
import pandas as pd
import os

def load_all_csvs_from_folder(folder_path):
    """
    Loads all CSV files from a specified folder into a single Pandas DataFrame.
    Assumes only the first CSV file has a header, and the rest do not.
    """
    all_dfs = []  # List to hold individual Pandas DataFrames
    column_names = None  # Variable to store column names from the first file

    # Get a list of all CSV files in the folder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    if not csv_files:
        print("No CSV files found in the folder.")
        return None

    # Process each CSV file
    for i, filename in enumerate(csv_files):
        file_path = os.path.join(folder_path, filename)

        # Read the first file with header to get column names
        if i == 0:
            df = pd.read_csv(file_path, low_memory=False)
            column_names = df.columns.tolist()  # Save column names
        else:
            # Read subsequent files without header and skip the first row
            df = pd.read_csv(file_path, low_memory=False, header=None, names=column_names, skiprows=1)

        all_dfs.append(df)

    # Concatenate all Pandas DataFrames into one
    combined_df = pd.concat(all_dfs, axis=0)
    return combined_df

# Specify the folder path
folder_path = "modis/2023"

# Load all CSVs from the folder
df = load_all_csvs_from_folder(folder_path)

if df is not None:
    print("Data from all CSVs in the folder:")
    print(df.head())  # Display the first few rows
else:
    print("No CSV files found in the folder.")

Data from all CSVs in the folder:
   latitude  longitude  brightness  scan  track    acq_date  acq_time  \
0   16.7462   -62.1626       310.4   1.7    1.3  2023-06-19      1443   
1   16.7496   -62.1601       313.5   2.1    1.4  2023-08-02      1725   
2   16.7559   -62.1613       320.3   1.1    1.0  2023-09-07      1758   
0  -24.9183    24.5636       326.6   1.0    1.0  2023-01-07       815   
1  -24.9198    24.5735       347.9   1.0    1.0  2023-01-07       815   

  satellite instrument  confidence  version  bright_t31   frp daynight  type  
0     Terra      MODIS           0    61.03       295.8  13.0        D     1  
1      Aqua      MODIS           0    61.03       298.1  19.2        D     1  
2      Aqua      MODIS           0    61.03       306.6  10.0        D     1  
0     Terra      MODIS          76    61.03       304.4  16.5        D     0  
1     Terra      MODIS          94    61.03       308.1  49.8        D     0  


In [9]:
import requests

def get_historical_weather(date: str, lat: float, lon: float):
    """
    :param date: Date in format 'YYYY-MM-DD'
    :param lat: Latitude of the location
    :param lon: Longitude of the location
    :return: Dictionary with temperature, sunshine duration, precipitation, wind speed, and evapotranspiration
    """
    url = "https://archive-api.open-meteo.com/v1/archive"

    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": date,
        "end_date": date,
        "daily": [
            "temperature_2m_max",
            "temperature_2m_min",
            "temperature_2m_mean",
            "sunshine_duration",
            "precipitation_sum",
            "wind_speed_10m_max",
            "et0_fao_evapotranspiration"
        ],
        "timezone": "auto"
    }

    response = requests.get(url, params=params)

    if response.status_code == 200:
        data = response.json()
        if "daily" in data:
            daily = data["daily"]
            return {
                "date": date,
                "location": (lat, lon),
                "max_temp": daily["temperature_2m_max"][0],
                "min_temp": daily["temperature_2m_min"][0],
                "mean_temp": daily["temperature_2m_mean"][0],
                "sunshine_duration": daily["sunshine_duration"][0],  # in seconds
                "precipitation": daily["precipitation_sum"][0],  # in mm
                "max_wind_speed": daily["wind_speed_10m_max"][0],  # in m/s
                "evapotranspiration": daily["et0_fao_evapotranspiration"][0]  # in mm
            }
        else:
            return {"error": "No weather data found for this date/location"}
    else:
        return {"error": f"API request failed with status code {response.status_code}"}

# Example usage
weather_summary = get_historical_weather("2023-06-15", 40.7128, -74.0060)  # New York City
print(weather_summary)

{'date': '2023-06-15', 'location': (40.7128, -74.006), 'max_temp': 26.1, 'min_temp': 14.9, 'mean_temp': 20.4, 'sunshine_duration': 47244.36, 'precipitation': 0.0, 'max_wind_speed': 20.4, 'evapotranspiration': 5.06}


In [10]:
def calculate_start_and_end_date(date):
  # date is in format 2023-01-02, i want to return a dictionary of the start of the day and the end of the day
  start_date = pd.to_datetime(date)
  end_date = start_date + pd.Timedelta(days=1)
  return [start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")]

In [11]:
def calculate_start_week_and_end_week(date):
  # date is in format 2023-01-02, i want to return a dictionary of the start of the day 6 days earlier and the end of the day of that day
  end_date = pd.to_datetime(date)
  start_of_week = end_date - pd.Timedelta(days=6)  # 6 days before the given date
  return[start_of_week, end_date]

not needed, but kept anyways -- we have no great way of estimating brightness the way NASA does for non fire sample locations

In [12]:
def normalize_fire_brightness(brightness_values):
    """Normalize fire brightness to a value between 0 and 1."""
    min_brightness = min(brightness_values)
    max_brightness = max(brightness_values)
    normalized_values = [(value - min_brightness) / (max_brightness - min_brightness) for value in brightness_values]
    return normalized_values

def standardize_fire_brightness(brightness_values):
    mean_brightness = np.mean(brightness_values)
    std_brightness = np.std(brightness_values)
    standardized_values = [(value - mean_brightness) / std_brightness for value in brightness_values]
    return standardized_values

In [13]:
import random
import datetime
import geopandas as gpd
from shapely.geometry import Point

# Download the Natural Earth land dataset (if not available)
land_shapefile = "land_polygons.shp"

try:
    land = gpd.read_file(land_shapefile)
except:
    land = gpd.read_file("https://naciscdn.org/naturalearth/10m/physical/ne_10m_land.zip")

def random_date_2023():
    """Generate a random date in the year 2023."""
    start_date = datetime.date(2023, 1, 1)
    end_date = datetime.date(2023, 12, 31)
    return start_date + datetime.timedelta(days=random.randint(0, (end_date - start_date).days))

def random_land_coordinates(min_lat=-30, max_lat=30):
    """Generate random land coordinates with latitude within a given range."""
    while True:
        lat = random.uniform(min_lat, max_lat)
        lon = random.uniform(-180, 180)
        point = Point(lon, lat)
        if land.contains(point).any():
            return lat, lon

# Generate results
date = random_date_2023()
lat, lon = random_land_coordinates(min_lat=-60, max_lat=60)

print(f"Random Date in 2023: {date}")
print(f"Random Land Coordinates: ({lat:.6f}, {lon:.6f})")

Random Date in 2023: 2023-09-28
Random Land Coordinates: (7.061107, 93.810586)


#Data preprocessing

In [None]:
def prepare_dataset(fire_df):
    """Prepare training dataset with fire and non-fire samples"""
    fire_samples = []
    for _, row in fire_df.iterrows():
        try:
            #print(row)
            start_date, end_date = calculate_start_and_end_date(row['acq_date'])
            weather_data = get_historical_weather(row['acq_date'], row['latitude'], row['longitude'])
            sample = {
                'lat': row['latitude'],
                'lon': row['longitude'],
                'fire': 1,
                'max_temp': weather_data['max_temp'],
                'min_temp': weather_data['min_temp'],
                'mean_temp': weather_data['mean_temp'],
                'sunshine_duration': weather_data['sunshine_duration'],
                'precipitation': weather_data['precipitation'],
                'max_wind_speed': weather_data['max_wind_speed'],
                'evapotranspiration': weather_data['evapotranspiration']
            }
            fire_samples.append(sample)
        except KeyError:
            # If a KeyError occurs, discard the sample
            continue

    # generate non-fire samples (adjust ratio as needed)
    non_fire_samples = []
    for _ in range(len(fire_samples)):
        try:
            # generate a random date in 2023
            date = random_date_2023()
            # generate random coordinates on land
            lat, lon = random_land_coordinates(min_lat=-60, max_lat=60)
            weather_data = get_historical_weather(date, lat, lon)
            sample = {
                'lat': lat,
                'lon': lon,
                'fire': 0,
                'max_temp': weather_data['max_temp'],
                'min_temp': weather_data['min_temp'],
                'mean_temp': weather_data['mean_temp'],
                'sunshine_duration': weather_data['sunshine_duration'],
                'precipitation': weather_data['precipitation'],
                'max_wind_speed': weather_data['max_wind_speed'],
                'evapotranspiration': weather_data['evapotranspiration']
            }
            non_fire_samples.append(sample)
        except KeyError:
            continue

    return pd.DataFrame(fire_samples + non_fire_samples)

dataset = prepare_dataset(df)
print(dataset.head())

In [122]:
print(dataset.head())

Empty DataFrame
Columns: []
Index: []


In [None]:
import pandas as pd
dataset = prepare_dataset(df)
print(dataset.head())
print(f"Dataset shape: {dataset.shape}")
print(dataset.head())

dataset.to_parquet('fire_dataset.parquet')

# reloaded_dataset = dd.read_parquet('fire_dataset.parquet')

#Model training

In [102]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['lat', 'lon', 'max_temp', 'min_temp', 'mean_temp',
                                   'sunshine_duration', 'precipitation', 'max_wind_speed',
                                   'evapotranspiration'])
    ])

def create_model(input_shape):
    model = Sequential([
        Dense(128, activation='relu', input_shape=input_shape),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [104]:
X = preprocessor.fit_transform(dataset.drop('fire', axis=1))
y = dataset['fire'].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train model
model = create_model((X_train.shape[1],))
history = model.fit(X_train, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2)

# Evaluate
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy:.2f}")

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 91ms/step - accuracy: 0.6125 - loss: 0.6655 - val_accuracy: 0.7000 - val_loss: 0.6394
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.6548 - loss: 0.6362 - val_accuracy: 0.7000 - val_loss: 0.6003
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.6420 - loss: 0.6142 - val_accuracy: 0.6333 - val_loss: 0.5664
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.8127 - loss: 0.5534 - val_accuracy: 0.6333 - val_loss: 0.5384
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - accuracy: 0.7404 - loss: 0.5397 - val_accuracy: 0.6333 - val_loss: 0.5129
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.7987 - loss: 0.5204 - val_accuracy: 0.6333 - val_loss: 0.4890
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

#Model Testing

In [105]:
prediction = 0
while prediction < 0.5:
    try:
        date = random_date_2023()
        lat, lon = random_land_coordinates(min_lat=-30, max_lat=30)
        weather_data = get_historical_weather(date, lat, lon)

        sample_input = pd.DataFrame([{
            'lat': lat,
            'lon': lon,
            'max_temp': weather_data['max_temp'],
            'min_temp': weather_data['min_temp'],
            'mean_temp': weather_data['mean_temp'],
            'sunshine_duration': weather_data['sunshine_duration'],
            'precipitation': weather_data['precipitation'],
            'max_wind_speed': weather_data['max_wind_speed'],
            'evapotranspiration': weather_data['evapotranspiration']
        }])

        # transform input
        sample_input_transformed = preprocessor.transform(sample_input)

        # predict!!
        prediction = model.predict(sample_input_transformed)
        fire_probability = prediction[0][0]
        print(f"Fire probability: {fire_probability:.2f}")

        # check if the fire probability is greater than or equal to 0.5
        if fire_probability >= 0.5:
            print("Fire detected! Printing all relevant information:")
            print(f"Date: {date}")
            print(f"Latitude: {lat}, Longitude: {lon}")
            print(f"Max Temperature: {weather_data['max_temp']}")
            print(f"Min Temperature: {weather_data['min_temp']}")
            print(f"Mean Temperature: {weather_data['mean_temp']}")
            print(f"Sunshine Duration: {weather_data['sunshine_duration']}")
            print(f"Precipitation: {weather_data['precipitation']}")
            print(f"Max Wind Speed: {weather_data['max_wind_speed']}")
            print(f"Evapotranspiration: {weather_data['evapotranspiration']}")
            print(f"Fire Probability: {fire_probability:.2f}")
            break

    except KeyError as e:
        print(f"KeyError: Missing key {e}. Continuing...")
        continue

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
Fire probability: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Fire probability: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Fire probability: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Fire probability: 0.00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Fire probability: 0.95
Fire detected! Printing all relevant information:
Date: 2023-11-13
Latitude: 27.3341956806738, Longitude: 64.56249078550798
Max Temperature: 20.2
Min Temperature: 9.1
Mean Temperature: 15.0
Sunshine Duration: 34541.46
Precipitation: 0.0
Max Wind Speed: 26.7
Evapotranspiration: 4.94
Fire Probability: 0.95


In [46]:
import pandas as pd
# throw current_fires in to the files
try:
    # delimiter is semicolon I HATE APPLE REFORMATING
    df = pd.read_csv('current_fires.csv', delimiter=';', header=None)

    latitudes = df.iloc[:, 0].tolist()
    longitudes = df.iloc[:, 1].tolist()
    dates = df.iloc[:, 2].tolist()
    total_length = len(df)

    print("Latitudes:", latitudes[:5])
    print("Longitudes:", longitudes[:5])
    print("Dates:", dates[:5])
    print("Total length of the list:", total_length)

    latitudes = [float(lat.replace(',', '.')) for lat in latitudes]
    longitudes = [float(lon.replace(',', '.')) for lon in longitudes]

    predictions = []
    for lat, lon, date in zip(latitudes, longitudes, dates):
        try:
            weather_data = get_historical_weather(date, lat, lon)

            max_temp = weather_data.get('max_temp', weather_data['mean_temp'])

            sample_input = pd.DataFrame([{
                'lat': lat,
                'lon': lon,
                'max_temp': max_temp,
                'min_temp': weather_data['min_temp'],
                'mean_temp': weather_data['mean_temp'],
                'sunshine_duration': weather_data['sunshine_duration'],
                'precipitation': weather_data['precipitation'],
                'max_wind_speed': weather_data['max_wind_speed'],
                'evapotranspiration': weather_data['evapotranspiration']
            }])

            sample_input_transformed = preprocessor.transform(sample_input)

            prediction = model.predict(sample_input_transformed)
            fire_probability = prediction[0][0]
            predictions.append(fire_probability)
        except KeyError as e:
            print(f"KeyError: {e} occurred for date {date}, lat {lat}, lon {lon}. Skipping this entry.")
            continue
        except Exception as e:
            print(f"An unexpected error occurred: {e} for date {date}, lat {lat}, lon {lon}. Skipping this entry.")
            continue

    print("Predictions:", predictions)

except FileNotFoundError:
    print("Error: 'fires.csv' not found in the current directory.")
except pd.errors.EmptyDataError:
    print("Error: 'fires.csv' is empty.")
except IndexError:
    print("Error: 'fires.csv' does not have at least three columns.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Latitudes: ['66,65865', '66,65951', '66,66631', '66,93654', '67,27251']
Longitudes: ['80,46708', '80,45956', '80,4539', '80,73838', '83,2158']
Dates: ['2025-02-02', '2025-02-02', '2025-02-02', '2025-02-02', '2025-02-02']
Total length of the list: 326572
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━

KeyboardInterrupt: 

In [47]:
print(predictions)

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999798, 0.99944717, 0.9994489, 0.99945647, 0.9998124, 0.9997468, 0.9998026, 0.99980265, 0.9895467, 0.9687893, 0.9687927, 0.96586305, 0.96586657, 0.9913432, 0.9972852, 0.99728596, 0.9992069, 0.9992086, 0.99920917, 0.9992094, 0.9986143, 0.9986144, 0.9986141, 0.99861413, 0.9992301, 0.99934435, 0.99935126, 0.9989535, 0.9978998, 0.99790853, 0.99790806, 0.99791086, 0.99916744, 0.9982663, 0.99984825, 0.9998471, 0.99945694, 0.99834114, 0.9983437, 0.99834377, 0.99604744, 0.99598163, 0.9959839, 0.995991, 0.995992, 0.99932545, 0.99932534, 0.9993279, 0.9993259, 0.9993258, 0.9992518, 0.99925244, 0.9990856, 0.999343, 0.9991253, 0.9991404, 0.9991402, 0.9991263, 0.99912596, 0.99912626, 0.99914074, 0.9991261, 0.99914086, 0.9994378, 0.99943787, 0.99943197, 0.9994335, 0.99944925, 0.99943966, 0.99944025, 0.9994408, 0.9994389, 0.99943113, 0.9992945, 0.99925524, 0.9

In [48]:
percentage_thresholds = [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
for threshold in percentage_thresholds:
    count = 0
    for prediction in predictions:
        if prediction >= (threshold / 100.0):
            count += 1
    print(f"{threshold}%: {(count / len(predictions)) * 100}%")

0%: 100.0%
5%: 99.44444444444444%
10%: 99.44444444444444%
20%: 97.22222222222221%
30%: 82.22222222222221%
40%: 81.66666666666667%
50%: 77.77777777777779%
60%: 75.0%
70%: 75.0%
80%: 75.0%
90%: 75.0%
100%: 15.555555555555555%


#Model saving

In [91]:
# prompt: save the model for future use
import joblib
import os
from tensorflow.keras.models import save_model

joblib.dump(preprocessor, 'preprocessor.joblib')
save_model(model, 'my_model.h5')
print("Model and preprocessor saved successfully!")



Model and preprocessor saved successfully!
Model saved to: saved_models/fire_model.h5


In [1]:
import joblib
from tensorflow.keras.models import load_model
preprocessor = joblib.load('preprocessor.joblib')
model = load_model('my_model.h5')

