HeatMap Generation using Folium Library

In [None]:
import pandas as pd
import numpy as np
import folium
import json
import matplotlib.pyplot as plt
import seaborn as sns
from folium.plugins import HeatMap, MeasureControl
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import (classification_report, confusion_matrix,
                             roc_auc_score, ConfusionMatrixDisplay)
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from lightgbm import LGBMClassifier
import joblib


# 1. Enhanced EDA Section

def perform_eda(X, y):
    data = X.copy()
    data['label'] = y.values

    # Select only numeric features for correlation analysis
    numeric_features = data.select_dtypes(include=np.number).columns
    excluded_columns_for_corr = ['latitude', 'longitude']
    data_inter = data[numeric_features]
    data_numeric = data_inter.drop(columns=excluded_columns_for_corr)

    print("\nBasic Statistics:")
    print(data.describe())

    # Class distribution
    plt.figure(figsize=(10, 6))
    sns.countplot(x='label', data=data, palette='viridis')
    plt.title('Class Distribution (0: Stable, 1: Landslide)')
    plt.xticks([0, 1], ['Stable', 'Landslide'])
    plt.savefig('Class Distribution.png', dpi=300, bbox_inches='tight')
    plt.show()

    # Feature correlations
    plt.figure(figsize=(14, 10))
    corr_matrix = data_numeric.corr()
    sns.heatmap(corr_matrix, annot=False, cmap='coolwarm',
                mask=np.triu(np.ones_like(corr_matrix, dtype=bool)))
    plt.title('Feature Correlation Matrix')
    plt.savefig('Feature Correlartion Matrix.png', dpi=300, bbox_inches='tight')
    plt.show()

    # Geographical distribution
    plt.figure(figsize=(12, 8))
    plt.scatter(data['longitude'], data['latitude'],
                c=data['label'], cmap='viridis', alpha=0.6)
    plt.title('Geographical Distribution of Landslide Risk')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.colorbar(label='Landslide Occurrence')
    plt.savefig('Geographical Distribution of Landslide Risk.png', dpi=300, bbox_inches='tight')
    plt.show()



# 2. Model Training & Evaluation
def train_model(X_train, X_test, y_train, y_test):
    # Model training
    lgb_model = LGBMClassifier(random_state=42)
    param_grid = {
        'learning_rate': [0.01],
        'max_depth': [4],
        'min_data_in_leaf': [200],
        'lambda_l1': [1.0],
        'lambda_l2': [1.0],
        'feature_fraction': [0.6]
    }

    grid_search = GridSearchCV(lgb_model, param_grid, scoring='roc_auc', cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_
    joblib.dump(best_model, 'landslide_model.pkl')

    return best_model


# 3. Probability Enhancement
def enhance_probability(probs, new_min=0.1, new_max=0.95, power=1.5):
    probs = np.array(probs)
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(probs.reshape(-1, 1)).flatten()
    powered = np.power(scaled, power)
    final_scaler = MinMaxScaler(feature_range=(new_min, new_max))
    enhanced_probs = final_scaler.fit_transform(powered.reshape(-1, 1)).flatten()

    return enhanced_probs


# 4. Heatmap Generation
def generate_heatmap(X_test, y_test, probabilities):
    # Create analysis dataframe with ORIGINAL coordinates
    analysis_df = X_test[['latitude', 'longitude']].copy()
    analysis_df['label'] = y_test.values
    analysis_df['probability'] = probabilities

    # Filter and enhance landslide points
    landslide_data = analysis_df[analysis_df['label'] == 1].copy()
    original_probs = landslide_data['probability'].values
    landslide_data['probability'] = enhance_probability(landslide_data['probability'])
    enhanced_probs = landslide_data['probability'].values

    # Verify transformation
    print(f"Original range: {original_probs.min():.2f} - {original_probs.max():.2f}")
    print(f"Enhanced range: {enhanced_probs.min():.2f} - {enhanced_probs.max():.2f}")

    # Histogram Plot of probability distributions
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.hist(original_probs, bins=20, color='blue', alpha=0.7)
    plt.title('Original Probabilities')
    plt.subplot(1, 2, 2)
    plt.hist(enhanced_probs, bins=20, color='red', alpha=0.7)
    plt.title('Enhanced Probabilities')
    plt.savefig('probability_distributions.png', dpi=300, bbox_inches='tight')
    plt.show()

    # Create map with proper coordinate validation
    valid_coords = landslide_data[
        (landslide_data['latitude'].between(-90, 90)) &
        (landslide_data['longitude'].between(-180, 180))
    ]

    if valid_coords.empty:
        raise ValueError("No valid geographic coordinates found after filtering")

    map_center = [
        valid_coords['latitude'].median(),
        valid_coords['longitude'].median()
    ]

    m = folium.Map(location=map_center, zoom_start=10,
                  tiles='cartodbpositron', control_scale=True)

    # Heatmap layer with string-formatted gradient keys
    HeatMap(
        valid_coords[['latitude', 'longitude', 'probability']].values.tolist(),
        radius=15,
        blur=20,
        min_opacity=0.5,
        gradient={'0.0': '#00ff00', '0.5': '#ffff00', '1.0': '#ff0000'},
        max_zoom=15
    ).add_to(m)

    # Add legend
    legend_html = '''
    <div style="
        position: fixed;
        bottom: 20px;
        left: 20px;
        width: 220px;
        background-color: white;
        border: 2px solid #ddd;
        border-radius: 8px;
        padding: 12px;
        box-shadow: 0 4px 6px rgba(0,0,0,0.1);
        z-index: 9999;
        font-family: Arial, sans-serif;
    ">
        <div style="
            text-align: center;
            font-weight: bold;
            font-size: 16px;
            color: #333;
            margin-bottom: 10px;
            padding-bottom: 10px;
            border-bottom: 1px solid #eee;
        ">
            Landslide Risk
        </div>
        <div style="display: flex; align-items: center; margin-bottom: 8px;">
            <div style="
                width: 25px;
                height: 25px;
                background-color: #00ff00;
                opacity: 0.7;
                margin-right: 10px;
                border-radius: 4px;
            "></div>
            <span style="color: #555;">Low (0.1-0.3)</span>
        </div>
        <div style="display: flex; align-items: center; margin-bottom: 8px;">
            <div style="
                width: 25px;
                height: 25px;
                background-color: #ffff00;
                opacity: 0.7;
                margin-right: 10px;
                border-radius: 4px;
            "></div>
            <span style="color: #555;">Medium (0.3-0.7)</span>
        </div>
        <div style="display: flex; align-items: center;">
            <div style="
                width: 25px;
                height: 25px;
                background-color: #ff0000;
                opacity: 0.7;
                margin-right: 10px;
                border-radius: 4px;
            "></div>
            <span style="color: #555;">High (0.7-0.95)</span>
        </div>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))

    # Add satellite layer with explicit attribute
    folium.TileLayer(
        tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
        attr='Esri World Imagery',
        name='Satellite View',
        overlay=False,
        control=True
    ).add_to(m)

    # Save outputs
    output_file = 'landslide_risk_map.html'
    m.save(output_file)
    valid_coords[['latitude', 'longitude', 'probability']].to_json(
        'landslide_points.json', orient='records'
    )
    print(f"Heatmap saved to {output_file}")

    return m


if __name__ == "__main__":
    # Load data
    X_train = pd.read_csv('final_X_train.csv')
    X_test = pd.read_csv('final_X_test.csv')
    y_train = pd.read_csv('y_train.csv').squeeze()
    y_test = pd.read_csv('y_test.csv').squeeze()

    # Perform EDA on test data
    perform_eda(X_test, y_test)

    # Separate coordinates before any processing
    train_lat_lon = X_train[['latitude', 'longitude']].copy()
    test_lat_lon = X_test[['latitude', 'longitude']].copy()

    # Separate numeric and categorical columns (excluding coordinates)
    numeric_columns = X_train.select_dtypes(include=[np.number]).columns.drop(['latitude', 'longitude']).tolist()
    categorical_columns = X_train.select_dtypes(include=['category', 'object']).columns.tolist()

    # Apply StandardScaler only to non-coordinate numeric columns
    scaler = StandardScaler()
    X_train_numeric = scaler.fit_transform(X_train[numeric_columns])
    X_test_numeric = scaler.transform(X_test[numeric_columns])

    # Create scaled dataframes with original coordinates
    X_train_scaled = pd.DataFrame(
        X_train_numeric,
        columns=numeric_columns,
        index=X_train.index
    ).join([train_lat_lon, X_train[categorical_columns]])

    X_test_scaled = pd.DataFrame(
        X_test_numeric,
        columns=numeric_columns,
        index=X_test.index
    ).join([test_lat_lon, X_test[categorical_columns]])

    # Convert categorical columns
    for col in categorical_columns:
        X_train_scaled[col] = X_train_scaled[col].astype('category')
        X_test_scaled[col] = X_test_scaled[col].astype('category')

    # Train model
    model = train_model(X_train_scaled, X_test_scaled, y_train, y_test)

    # Generate predictions using original coordinates
    y_pred = model.predict(X_test_scaled)
    y_proba = model.predict_proba(X_test_scaled)[:, 1]

    # Generate reports
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['Stable', 'Landslide']))

    print("\nConfusion Matrix:")
    ConfusionMatrixDisplay.from_predictions(y_test, y_pred,
                                          display_labels=['Stable', 'Landslide'],
                                          cmap='Blues')
    plt.savefig('Confusion Matrix.png', dpi=300, bbox_inches='tight')
    plt.show()

    # Generate heatmap
    risk_map = generate_heatmap(X_test, y_test, y_proba)
    print("\nLandslide risk map generated successfully!")