In [None]:
import zipfile
import os

# Path to the zip file
zip_path = '/content/noaa_data_2023.zip'  # <-- change this
extract_dir = '/content/noaa'  # <-- or use '.' to extract in current folder

# Create the output directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"Extracted ZIP to: {extract_dir}")

Extracted ZIP to: /content/noaa


In [None]:
import pandas as pd
import glob
import os

# Set your folder path containing all the CSVs
folder_path = '/content/noaa/noaa_data_2023'  # <-- change this to your actual folder

# Get all CSV file paths in that folder
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

# Read and concatenate all CSVs into one DataFrame
merged_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

# Optionally save the result
merged_df.to_csv('merged_output.csv', index=False)

print(f"Merged {len(csv_files)} files. Result saved as 'merged_output.csv'")

Merged 147 files. Result saved as 'merged_output.csv'


In [None]:
df=pd.read_csv("merged_output.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51593 entries, 0 to 51592
Data columns (total 28 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   STATION           51593 non-null  object 
 1   DATE              51593 non-null  object 
 2   LATITUDE          51593 non-null  float64
 3   LONGITUDE         51593 non-null  float64
 4   ELEVATION         51593 non-null  float64
 5   NAME              51593 non-null  object 
 6   TEMP              51593 non-null  float64
 7   TEMP_ATTRIBUTES   51593 non-null  int64  
 8   DEWP              51593 non-null  float64
 9   DEWP_ATTRIBUTES   51593 non-null  int64  
 10  SLP               51593 non-null  float64
 11  SLP_ATTRIBUTES    51593 non-null  int64  
 12  STP               51593 non-null  float64
 13  STP_ATTRIBUTES    51593 non-null  int64  
 14  VISIB             51593 non-null  float64
 15  VISIB_ATTRIBUTES  51593 non-null  int64  
 16  WDSP              51593 non-null  float6

In [None]:
df.head()

Unnamed: 0,STATION,DATE,LATITUDE,LONGITUDE,ELEVATION,NAME,TEMP,TEMP_ATTRIBUTES,DEWP,DEWP_ATTRIBUTES,...,MXSPD,GUST,MAX,MAX_ATTRIBUTES,MIN,MIN_ATTRIBUTES,PRCP,PRCP_ATTRIBUTES,SNDP,FRSHTT
0,72289903179,2023-01-01,33.97563,-117.62487,193.9,"CHINO AIRPORT, CA US",55.9,24,50.2,24,...,15.9,28.9,61.0,,48.9,,1.05,G,999.9,10000
1,72289903179,2023-01-02,33.97563,-117.62487,193.9,"CHINO AIRPORT, CA US",48.3,24,41.8,24,...,9.9,999.9,61.0,,37.9,,0.01,G,999.9,100000
2,72289903179,2023-01-03,33.97563,-117.62487,193.9,"CHINO AIRPORT, CA US",51.8,24,47.2,24,...,8.9,999.9,59.0,,37.9,,0.13,G,999.9,10000
3,72289903179,2023-01-04,33.97563,-117.62487,193.9,"CHINO AIRPORT, CA US",56.6,24,53.3,24,...,8.0,999.9,63.0,,48.9,,0.06,G,999.9,110000
4,72289903179,2023-01-05,33.97563,-117.62487,193.9,"CHINO AIRPORT, CA US",57.0,24,52.7,24,...,14.0,18.1,63.0,,52.0,,0.19,G,999.9,10000


In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
from datetime import timedelta

# --- Load Data ---
fire_df = pd.read_csv('viirs_snpp_2023_CA.csv')
weather_df = pd.read_csv('merged_output.csv')

fire_df['datetime'] = pd.to_datetime(
    fire_df['acq_date'] + fire_df['acq_time'].astype(str).str.zfill(4),
    format='%Y-%m-%d%H%M'
)
weather_df['datetime'] = pd.to_datetime(weather_df['DATE'])

# Drop rows with missing coordinates
weather_df = weather_df.dropna(subset=['LATITUDE', 'LONGITUDE'])

# --- Build BallTree with weather coordinates (in radians) ---
weather_coords = np.deg2rad(weather_df[['LATITUDE', 'LONGITUDE']].values)
tree = BallTree(weather_coords, metric='haversine')  # haversine gives great-circle distance

# Convert fire coordinates to radians
fire_coords = np.deg2rad(fire_df[['latitude', 'longitude']].values)

# Query nearest weather record for each fire point (return index)
_, indices = tree.query(fire_coords, k=1)  # k=1 → nearest neighbor

# Retrieve matched weather rows
nearest_weather = weather_df.iloc[indices.flatten()].reset_index(drop=True)

# (Optional) Time-based refinement — not required if you only care about nearest spatial match

# Select weather features to keep
weather_features = ['TEMP', 'DEWP', 'SLP', 'WDSP', 'GUST', 'PRCP', 'MAX', 'MIN']
matched_weather = nearest_weather[weather_features]

# Combine and save
fire_with_weather = pd.concat([fire_df.reset_index(drop=True), matched_weather], axis=1)
fire_with_weather.to_csv('fire_with_quick_weather_match.csv', index=False)
print("✅ Saved fast-matched fire data to 'fire_with_quick_weather_match.csv'")

✅ Saved fast-matched fire data to 'fire_with_quick_weather_match.csv'


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_csv("/content/fire_with_quick_weather_match.csv")  # Update filename if needed

# Drop rows with missing values in critical columns
required_columns = [
    'latitude', 'longitude', 'bright_ti4', 'bright_ti5', 'frp', 'scan', 'track',
    'TEMP', 'DEWP', 'SLP', 'WDSP', 'GUST', 'PRCP', 'MAX', 'MIN',
    'confidence', 'daynight'
]
df = df.dropna(subset=required_columns + ['type'])

# Encode categorical features
df['confidence_encoded'] = LabelEncoder().fit_transform(df['confidence'])
df['daynight_encoded'] = LabelEncoder().fit_transform(df['daynight'])

# Convert target to binary: 0 = no fire, 1 = fire (type != 0)
df['fire_binary'] = (df['type'] != 0).astype(int)

# Define feature columns
features = [
    'latitude', 'longitude',
    'TEMP', 'DEWP', 'SLP', 'WDSP', 'GUST',
    'PRCP', 'MAX', 'MIN',
    'daynight_encoded',  # or 'hour_of_day' if you extract it
]

# Split data
X = df[features]
y = df['fire_binary']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest
model = RandomForestClassifier(n_estimators=1000, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      0.99      0.98      6427
           1       0.88      0.82      0.85       809

    accuracy                           0.97      7236
   macro avg       0.93      0.90      0.92      7236
weighted avg       0.97      0.97      0.97      7236



In [None]:
import pandas as pd

# Create sample input data
test_cases = pd.DataFrame([
    # Hot, dry, windy, day — high risk
    {
        'latitude': 38.5,
        'longitude': -121.5,
        'TEMP': 97.0,
        'DEWP': 25.0,
        'SLP': 1008.0,
        'WDSP': 8.0,
        'GUST': 25.0,
        'PRCP': 0.0,
        'MAX': 101.0,
        'MIN': 72.0,
        'daynight_encoded': 0  # 0 = day
    },
    # Cool, moist, calm, night — low risk
    {
        'latitude': 39.0,
        'longitude': -122.0,
        'TEMP': 65.0,
        'DEWP': 50.0,
        'SLP': 1016.0,
        'WDSP': 2.0,
        'GUST': 5.0,
        'PRCP': 2.0,
        'MAX': 68.0,
        'MIN': 55.0,
        'daynight_encoded': 1  # 1 = night
    },
    # Dry but moderate temp, breezy — moderate risk
    {
        'latitude': 37.8,
        'longitude': -120.0,
        'TEMP': 85.0,
        'DEWP': 30.0,
        'SLP': 1012.0,
        'WDSP': 5.5,
        'GUST': 15.0,
        'PRCP': 0.1,
        'MAX': 90.0,
        'MIN': 65.0,
        'daynight_encoded': 0
    }
])
# Predict fire probability
fire_prob = model.predict_proba(test_cases)[:1]
fire_prob

array([[0.951, 0.049]])

In [None]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMap

# -- 1. Create a grid of test points over California --
base_sample = {
    'bright_ti4': 345.0,
    'bright_ti5': 310.0,
    'frp': 30.0,
    'scan': 0.4,
    'track': 0.4,
    'TEMP': 85.0,
    'DEWP': 55.0,
    'SLP': 1010.0,
    'WDSP': 5.0,
    'GUST': 18.0,
    'PRCP': 0.0,
    'MAX': 90.0,
    'MIN': 60.0,
    'confidence_encoded': 1,   # assuming "n"
    'daynight_encoded': 0      # assuming "D"
}

# Define lat/lon grid across California
lats = np.linspace(32.5, 42.0, 40)     # 40 steps from south to north
lons = np.linspace(-124.4, -114.0, 40) # 40 steps from west to east

grid_points = []
for lat in lats:
    for lon in lons:
        sample = base_sample.copy()
        sample['latitude'] = lat
        sample['longitude'] = lon
        grid_points.append(sample)

grid_df = pd.DataFrame(grid_points)

# -- 2. Predict fire probabilities using trained model --
# NOTE: `model` should be your trained RandomForestClassifier
#       `features` should match your training features
probs = model.predict_proba(grid_df[features])[:, 1]

# -- 3. Generate heatmap data: (lat, lon, probability) --
heatmap_data = list(zip(grid_df['latitude'], grid_df['longitude'], probs))

# -- 4. Create Folium map --
california_map = folium.Map(location=[37.5, -119.5], zoom_start=6)
HeatMap(heatmap_data, radius=8, blur=12).add_to(california_map)

# -- 5. Save map --
california_map.save("california_fire_hotspot_map.html")
print("🔥 Hotspot map saved as 'california_fire_hotspot_map.html'")

🔥 Hotspot map saved as 'california_fire_hotspot_map.html'


In [None]:
import joblib

joblib.dump(model, 'fire_risk_model.joblib')
joblib.dump(features, 'model_features.joblib')

['model_features.joblib']