In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression

# Load both fire datasets
fire_data_1 = pd.read_csv('/content/fire_archive_V1_96617.csv')
fire_data_2 = pd.read_csv('/content/fire_archive_M6_96619.csv')

# Display the first few rows of both datasets to understand their structure
fire_data_1.head(), fire_data_2.head()


(   latitude  longitude  bright_ti4  scan  track    acq_date  acq_time  \
 0 -34.45902  150.88040       326.5  0.60   0.71  2019-08-01       246   
 1 -31.70724  151.07191       367.0  0.56   0.69  2019-08-01       247   
 2 -31.39626  149.65253       325.2  0.68   0.74  2019-08-01       247   
 3 -30.39774  152.06432       347.8  0.48   0.65  2019-08-01       247   
 4 -31.20248  151.89766       350.7  0.50   0.66  2019-08-01       247   
 
   satellite instrument confidence  version  bright_ti5   frp  type  
 0         N      VIIRS          n        1       290.7   5.8     3  
 1         N      VIIRS          h        1       291.9  10.4     0  
 2         N      VIIRS          n        1       292.7   5.6     0  
 3         N      VIIRS          n        1       287.6   9.1     0  
 4         N      VIIRS          n        1       292.3   9.4     0  ,
    latitude  longitude  brightness  scan  track    acq_date  acq_time  \
 0  -11.8070   142.0583       313.0   1.0    1.0  2019-08-0

In [55]:
fire_data_1 = pd.read_csv(file_path_1)
fire_data_2 = pd.read_csv(file_path_2)

# Combine the datasets (keeping relevant columns)
fire_data_1_cleaned = fire_data_1[['latitude', 'longitude', 'bright_ti4', 'frp', 'confidence', 'type']]
fire_data_2_cleaned = fire_data_2[['latitude', 'longitude', 'brightness', 'frp', 'confidence', 'type']]

# Rename brightness column to keep it consistent across datasets
fire_data_1_cleaned.rename(columns={'bright_ti4': 'brightness'}, inplace=True)

# Combine the two datasets
combined_fire_data = pd.concat([fire_data_1_cleaned, fire_data_2_cleaned])

# Convert 'confidence' to numeric, replacing 'n' with NaN and dropping rows with NaNs
combined_fire_data['confidence'] = pd.to_numeric(combined_fire_data['confidence'], errors='coerce')
combined_fire_data.dropna(inplace=True)

# Remap the classes in the 'type' column to be consecutive
class_mapping = {0: 0, 2: 1, 3: 2}
combined_fire_data['type'] = combined_fire_data['type'].map(class_mapping)

# Prepare the features (X) and target (y)
X = combined_fire_data[['latitude', 'longitude', 'brightness', 'frp', 'confidence']]
y = combined_fire_data['type']  # Remapped 'type' column is our target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Logistic Regression model
log_reg_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
log_reg_model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = log_reg_model.predict(X_test_scaled)

# Evaluate the model accuracy
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Test Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n", classification_rep)

# Example input (latitude, longitude, brightness, frp, confidence)
example_input = np.array([[ -31.70724, 151.07191, 367.0, 10.4, 100 ]])  # Example data

# Scale the input data using the same scaler
example_input_scaled = scaler.transform(example_input)

# Make a prediction for the example
predicted_class = log_reg_model.predict(example_input_scaled)

# Output the predicted class for the example
print(f"Predicted class for the example: {predicted_class[0]}")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fire_data_1_cleaned.rename(columns={'bright_ti4': 'brightness'}, inplace=True)


Test Accuracy: 99.03%

Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      1.00     10699
           1       0.00      0.00      0.00       105

    accuracy                           0.99     10804
   macro avg       0.50      0.50      0.50     10804
weighted avg       0.98      0.99      0.99     10804

Predicted class for the example: 0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [56]:
import joblib

# Save the trained Logistic Regression model to a file
joblib.dump(log_reg_model, 'logistic_regression_fire_model.pkl')

# Save the scaler as well since we need it to scale future inputs before predictions
joblib.dump(scaler, 'scaler_fire_model.pkl')

print("Model and scaler saved successfully.")


Model and scaler saved successfully.


In [57]:
# Load the saved model and scaler
loaded_model = joblib.load('logistic_regression_fire_model.pkl')
loaded_scaler = joblib.load('scaler_fire_model.pkl')

# Example input (latitude, longitude, brightness, frp, confidence)
example_input = np.array([[ -31.70724, 151.07191, 367.0, 10.4, 100 ]])  # Example data

# Scale the input using the loaded scaler
example_input_scaled = loaded_scaler.transform(example_input)

# Make a prediction using the loaded model
predicted_class = loaded_model.predict(example_input_scaled)

# Output the predicted class for the example
print(f"Predicted class for the example: {predicted_class[0]}")


Predicted class for the example: 0


