In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import joblib

In [10]:
def load_data():
    df = pd.read_csv('model_training_dataset.csv')
    return df

In [4]:
# Preprocess the data
def preprocess_data(df):
    # Encode the target variable
    le = LabelEncoder()
    df['severity'] = le.fit_transform(df['severity'])
    
    # Select features
    features = ['temp', 'humidity', 'rain_1h', 'wind_speed', 'pressure', 'clouds']
    X = df[features]
    y = df['severity']
    
    return X, y, le, features

In [5]:
# Train the model
def train_model(X, y):
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Create and train the model
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate the model
    y_pred = model.predict(X_test)
    print("Model Performance:")
    print(classification_report(y_test, y_pred))
    
    return model

In [6]:
# Function to make predictions on live data
def predict_severity(model, le, features, live_data):
    """
    Make predictions on live weather data
    
    Args:
        model: Trained RandomForestClassifier model
        le: LabelEncoder for severity labels
        features: List of feature names
        live_data: Dictionary containing live weather data
    
    Returns:
        Predicted severity label
    """
    # Create a DataFrame from live data
    live_df = pd.DataFrame([live_data])
    
    # Ensure the features are in the correct order
    live_df = live_df[features]
    
    # Make prediction
    prediction = model.predict(live_df)
    
    # Convert numerical prediction back to label
    severity_label = le.inverse_transform(prediction)[0]
    
    return severity_label

In [7]:
def main():
    # Load and preprocess data
    df = load_data()
    X, y, le, features = preprocess_data(df)
    
    # Train the model
    model = train_model(X, y)
    
    # Save the model and label encoder
    joblib.dump(model, 'disaster_severity_model.joblib')
    joblib.dump(le, 'label_encoder.joblib')
    joblib.dump(features, 'feature_list.joblib')
    
    print("\nModel and supporting files saved successfully!")


In [11]:
main()

Model Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       127
           1       0.94      0.99      0.97       120
           2       0.99      0.94      0.96       113

    accuracy                           0.98       360
   macro avg       0.98      0.98      0.98       360
weighted avg       0.98      0.98      0.98       360


Model and supporting files saved successfully!
