## Importing Required Libraries

In [1]:
import numpy as np
import pandas as pd 
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

## Data Loading and Exploration
Load the dataset and explore its basic information.

In [2]:
nd = pd.read_csv('pend-gdis-1960-2018-disasterlocations.csv')

In [3]:
nd.head()

Unnamed: 0,id,country,iso3,gwno,year,geo_id,geolocation,level,adm1,adm2,adm3,location,historical,hist_country,disastertype,disasterno,latitude,longitude
0,109,Albania,ALB,339.0,2009,346,Ana E Malit,3,Shkoder,Shkodres,Ana E Malit,Ana E Malit,0,,flood,2009-0631,42.020948,19.418317
1,109,Albania,ALB,339.0,2009,351,Bushat,3,Shkoder,Shkodres,Bushat,Bushat,0,,flood,2009-0631,41.959294,19.514309
2,175,Angola,AGO,540.0,2001,760,Onjiva,3,Cunene,Cuanhama,Onjiva,Onjiva,0,,flood,2001-0146,-17.093484,15.665758
3,187,Angola,AGO,540.0,2009,710,Evale,3,Cunene,Cuanhama,Evale,Evale,0,,flood,2009-0092,-16.531533,15.773987
4,187,Angola,AGO,540.0,2009,749,Mupa,3,Cunene,Cuvelai,Mupa,Mupa,0,,flood,2009-0092,-16.200065,15.844189


In [4]:
print(nd.columns)

Index(['id', 'country', 'iso3', 'gwno', 'year', 'geo_id', 'geolocation',
       'level', 'adm1', 'adm2', 'adm3', 'location', 'historical',
       'hist_country', 'disastertype', 'disasterno', 'latitude', 'longitude'],
      dtype='object')


In [5]:
nd.shape

(39953, 18)

Combine 'latitude' and 'longitude' columns to create a new 'latlong' column.

In [6]:
nd['latlong'] = nd['latitude'] + nd['longitude']

In [7]:
nd.head()

Unnamed: 0,id,country,iso3,gwno,year,geo_id,geolocation,level,adm1,adm2,adm3,location,historical,hist_country,disastertype,disasterno,latitude,longitude,latlong
0,109,Albania,ALB,339.0,2009,346,Ana E Malit,3,Shkoder,Shkodres,Ana E Malit,Ana E Malit,0,,flood,2009-0631,42.020948,19.418317,61.439266
1,109,Albania,ALB,339.0,2009,351,Bushat,3,Shkoder,Shkodres,Bushat,Bushat,0,,flood,2009-0631,41.959294,19.514309,61.473603
2,175,Angola,AGO,540.0,2001,760,Onjiva,3,Cunene,Cuanhama,Onjiva,Onjiva,0,,flood,2001-0146,-17.093484,15.665758,-1.427727
3,187,Angola,AGO,540.0,2009,710,Evale,3,Cunene,Cuanhama,Evale,Evale,0,,flood,2009-0092,-16.531533,15.773987,-0.757546
4,187,Angola,AGO,540.0,2009,749,Mupa,3,Cunene,Cuvelai,Mupa,Mupa,0,,flood,2009-0092,-16.200065,15.844189,-0.355876


## Correlation Analysis for Categorical and Numeric Variables

This code calculates correlations between a specified column ('disastertype') and all other columns in the dataset 

In [8]:
from scipy.stats import pointbiserialr
from scipy.stats import pearsonr
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

def calculate_correlations(column_name):
    # Create a copy of the DataFrame
    nd_encoded = nd.copy()

    # Convert non-numeric columns to categorical variables using label encoding
    for column in nd_encoded.columns:
        if not pd.api.types.is_numeric_dtype(nd_encoded[column]):
            label_encoder = LabelEncoder()
            nd_encoded[column] = label_encoder.fit_transform(nd_encoded[column])

    # Create a dictionary to store the correlation values
    correlation_values = {}

    # Calculate the correlation between the specified column and all other columns
    for column in nd_encoded.columns.drop(column_name):
        # Calculate the correlation between the current column and the specified column
        if pd.api.types.is_numeric_dtype(nd_encoded[column]):
            # Handle missing values by replacing them with the column mean
            nd_encoded[column] = nd_encoded[column].fillna(nd_encoded[column].mean())
            correlation, _ = pearsonr(nd_encoded[column], nd_encoded[column_name])
        else:
            # Perform one-hot encoding on non-numeric columns
            one_hot_encoder = OneHotEncoder(sparse=False)
            encoded_data = one_hot_encoder.fit_transform(nd_encoded[[column]])
            correlation, _ = pointbiserialr(encoded_data, nd_encoded[column_name])

        # Store the correlation value in the dictionary
        correlation_values[column] = correlation

    # Convert the correlation values to a DataFrame
    correlation_df = pd.DataFrame(correlation_values.items(), columns=['Column', 'Correlation'])

    # Sort the correlation values in descending order
    sorted_correlations = correlation_df.sort_values(by='Correlation', ascending=False)

    # Print the correlations in descending order
    for index, row in sorted_correlations.iterrows():
        column = row['Column']
        correlation = row['Correlation']
        na_count = nd_encoded[column].isna().sum()
        print(f"{column}:")
        print(f"Correlation value: {correlation}")
        print()

        
calculate_correlations('disastertype')


location:
Correlation value: 0.2449281635069503

id:
Correlation value: 0.23705333751004032

iso3:
Correlation value: 0.17099747179357702

country:
Correlation value: 0.16461547889140457

latitude:
Correlation value: 0.04873389176815423

adm3:
Correlation value: 0.032243765224485084

adm2:
Correlation value: 0.027472865273895003

latlong:
Correlation value: 0.010811810902564358

adm1:
Correlation value: 0.008296835522776309

geolocation:
Correlation value: 0.0014511422826589168

longitude:
Correlation value: -0.0019004570526256273

historical:
Correlation value: -0.005911405812344851

gwno:
Correlation value: -0.007362310927801943

hist_country:
Correlation value: -0.01401785641579971

level:
Correlation value: -0.045553440722593874

disasterno:
Correlation value: -0.06368699096412464

year:
Correlation value: -0.06957563670834027

geo_id:
Correlation value: -0.22607307142913063



## Data Preparation and Analysis
Performing data preparation and analysis tasks to extract relevant columns, check for missing values, and display column names.

In [9]:
columns_to_keep = ['disastertype', 'latitude', 'longitude']
nd_ml = nd[columns_to_keep]

In [10]:
nd_ml.head()

Unnamed: 0,disastertype,latitude,longitude
0,flood,42.020948,19.418317
1,flood,41.959294,19.514309
2,flood,-17.093484,15.665758
3,flood,-16.531533,15.773987
4,flood,-16.200065,15.844189


In [11]:
# Check for missing values in nd_ml
missing_values_count = nd_ml.isnull().sum()

# Create a new DataFrame to display missing values count in a tabular format
missing_values_table = pd.DataFrame(missing_values_count, columns=['Missing Values'])

print(missing_values_table)

              Missing Values
disastertype               0
latitude                   0
longitude                  0


In [12]:
print(nd_ml.columns)

Index(['disastertype', 'latitude', 'longitude'], dtype='object')


## Data Splitting: Feature Matrix and Target Variable
Dividing the dataset into feature matrix 'X' and target variable 'y', followed by splitting the data into training and testing sets.

In [13]:
# Prepare the feature matrix X and the target variable y
X = nd_ml[['latitude', 'longitude']]
y = nd_ml['disastertype']

In [14]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Implementation and Evaluation 

### Linear Regression Model

Linear regression is a supervised machine learning algorithm used for predicting a continuous target variable based on one or more input features. It assumes a linear relationship between the input features and the target variable. The primary goal of linear regression is to find the best-fitting straight line that minimizes the difference between the predicted and actual values.

<strong>Formula</strong>
The simple linear regression model can be expressed with the formula:

$$ y = mx + b $$

Where:
<ul>
<li><em>y</em> is the predicted target variable.</li>
<li><em>x</em> is the input feature.</li>
<li><em>m</em> is the slope of the line (coefficient of the feature).</li>
<li><em>b</em> is the y-intercept (bias).</li>
</ul>

The goal of the algorithm is to find the values of <em>m</em> and <em>b</em> that minimize the sum of squared differences between the predicted and actual values.
</div>

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Encode categorical labels as integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Create the linear regression model
model = LinearRegression()

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Since we're using linear regression, we can calculate metrics suitable for regression tasks
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print('Mean Squared Error:', mse)
print('Root Mean Squared Error:', rmse)
print('Mean Absolute Error:', mae)

Mean Squared Error: 3.6563434478102255
Root Mean Squared Error: 1.9121567529390016
Mean Absolute Error: 1.6014526595652268


### Logistic Regression

Logistic Regression is a widely used classification algorithm in machine learning. It's particularly suited for binary classification tasks, where the goal is to predict one of two possible outcomes. The model estimates the probability that a given input instance belongs to a certain class, and based on a threshold, assigns it to one of the two classes.

<strong>Formal Notation</strong>

Given a dataset with input features \(X\) and corresponding binary labels \(y\), the logistic regression model computes the probability \(P(y=1|X)\) that an input instance belongs to class 1. This probability is modeled using the logistic function, also known as the sigmoid function:

$$
P(y=1|X) = \frac{1}{1 + e^{-\left(\mathbf{w}^T \mathbf{x} + b\right)}}
$$

Where:
<ul>
<li><em>\( \mathbf{w} \)</em> is the weight vector of the model.</li>
<li><em>\( \mathbf{x} \)</em> is the input feature vector.</li>
<li><em>\( b \)</em> is the bias term.</li>
</ul>

The logistic function maps the weighted sum of input features and bias to a value between 0 and 1, representing the estimated probability.


In [16]:
from sklearn.linear_model import LogisticRegression

# Create the logistic regression model
model = LogisticRegression()

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print('Model Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Model Accuracy: 0.4374921787010387
Precision: 0.3427089651440492
Recall: 0.4374921787010387
F1-score: 0.33799410310539035


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))


###  K-Nearest Neighbors
K-Nearest Neighbors (KNN) is a simple yet effective classification algorithm used for both binary and multi-class classification tasks. The core idea behind KNN is to predict the class of an input instance by considering the classes of its nearest neighbors in the feature space.

<strong>Formal Notation</strong>

Given a dataset with input features \(X\) and corresponding labels \(y\), the KNN algorithm operates as follows:

1. For a given test instance \(x\), the algorithm identifies the \(k\) training instances in the dataset that are closest to \(x\) based on a chosen distance metric (e.g., Euclidean distance).
2. The class label of the test instance \(x\) is determined by a majority vote among its \(k\) nearest neighbors' class labels. If \(k = 1\), the instance is assigned the class of its nearest neighbor.

<strong>Choosing \(k\)</strong>

The value of \(k\) is a hyperparameter that affects the KNN algorithm's performance. A smaller \(k\) value makes the model more sensitive to noise, while a larger \(k\) value makes it more robust but potentially less able to capture local patterns. The choice of \(k\) depends on the dataset's characteristics and can be determined through techniques like cross-validation.

<strong>Distance Metric</strong>

The choice of distance metric, such as Euclidean distance or Manhattan distance, depends on the nature of the data and the problem at hand. It determines how the algorithm measures the "closeness" between instances in the feature space.

<strong>Evaluation</strong>

The KNN classifier is evaluated using metrics like accuracy, precision, recall, F1-score, and confusion matrices. Additionally, the choice of \(k\) and the distance metric can significantly impact the model's performance.

KNN is a non-parametric algorithm, meaning it doesn't make any assumptions about the underlying data distribution. While it's easy to understand and implement, its efficiency can decrease for large datasets, as it requires calculating distances to all training instances.

KNN can be a powerful classifier, especially when the decision boundaries are irregular or when there's significant overlap between classes. However, it's important to preprocess the data, handle missing values, and consider the impact of outliers to achieve optimal results.
</div>

In [17]:
from sklearn.neighbors import KNeighborsClassifier

# Create the KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)

# Fit the model to the training data
knn.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = knn.predict(X_test)

# Evaluate the model accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print('Model Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Model Accuracy: 0.580903516456013
Precision: 0.5901008565550598
Recall: 0.580903516456013
F1-score: 0.5824126826748549


  _warn_prf(average, modifier, msg_start, len(result))


###  Decision Tree

A Decision Tree is a versatile supervised machine learning algorithm used for both classification and regression tasks. It creates a tree-like structure of decisions that splits the input feature space into subsets based on the values of features. Each internal node of the tree represents a decision based on a feature, and each leaf node represents a class label or a predicted value.

<strong>Formal Notation</strong>

Given a dataset with input features \(X\) and corresponding labels \(y\), the Decision Tree algorithm constructs a tree by recursively partitioning the feature space using binary decisions. At each internal node, a feature is chosen to make the best split, optimizing a criterion such as Gini impurity or information gain.

<strong>Splitting Criteria</strong>

Different criteria can be used for splitting nodes, such as:
- Gini impurity: Measures the probability of an incorrect classification if an item is randomly chosen from the node's distribution.
- Information gain: Measures the reduction in uncertainty after the split, based on the entropy of the class labels.
- Gain ratio: Adjusts information gain for the number of branches in the split.

<strong>Pruning</strong>

To avoid overfitting, Decision Trees can be pruned by removing branches that don't contribute significantly to improving the model's performance on validation data.

<strong>Evaluation</strong>

The Decision Tree classifier is evaluated using metrics like accuracy, precision, recall, F1-score, and confusion matrices. It's important to visualize the tree's structure to interpret its decision-making process and identify potential overfitting.

Decision Trees are intuitive, easy to interpret, and capable of handling both numerical and categorical data. However, they are prone to overfitting, especially with deep trees. Techniques like pruning, limiting the tree depth, and using ensemble methods like Random Forests can mitigate this issue.

Decision Trees can also handle multi-output problems, making them suitable for tasks with multiple correlated outputs. Their ability to capture non-linear relationships and handle interactions between features makes them useful in various domains.
</div>

In [18]:
from sklearn.tree import DecisionTreeClassifier

# Create the decision tree classifier
model = DecisionTreeClassifier()

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)

# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print('Model Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Model Accuracy: 0.6115630083844324
Precision: 0.6076613324006951
Recall: 0.6115630083844324
F1-score: 0.6085562622111524


###  Random Forest
Random Forest is an ensemble learning algorithm that combines multiple Decision Trees to improve predictive performance and reduce overfitting. It's widely used for both classification and regression tasks. Each Decision Tree in the Random Forest is trained on a random subset of the data and features.

<strong>Formal Notation</strong>

Given a dataset with input features \(X\) and corresponding labels \(y\), the Random Forest algorithm builds a collection of Decision Trees. For classification, each tree predicts the class label, and the final class is determined by majority voting. For regression, the individual tree predictions are averaged to get the final prediction.

<strong>Ensemble Approach</strong>

Random Forest leverages the idea of the ensemble approach. By combining multiple weak learners (Decision Trees), it enhances predictive power and generalization. The randomness introduced during tree construction reduces the correlation between trees, which improves the overall model's performance.

<strong>Feature Selection</strong>

Random Forest randomly selects a subset of features for each Decision Tree, preventing certain features from dominating the predictions. This randomness improves the diversity of trees in the forest.

<strong>Out-of-Bag (OOB) Error</strong>

Each tree in the forest is trained on a subset of the data, leaving out some samples. These out-of-bag samples can be used to estimate the model's performance without the need for a separate validation set.

<strong>Advantages</strong>

Random Forests are robust, perform well on various types of data, and require minimal parameter tuning. They can handle high-dimensional data and capture complex relationships between features.

<strong>Evaluation</strong>

Random Forest is evaluated using metrics like accuracy, precision, recall, F1-score, and confusion matrices. Visualizing individual trees can help understand their decision-making process.

Random Forests are widely used in practice due to their versatility, ability to handle noise, and resistance to overfitting. They are suitable for tasks with complex feature interactions and noisy data.


In [19]:
from sklearn.ensemble import RandomForestClassifier

# Create the Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model accuracy
accuracy = accuracy_score(y_test, y_pred)

# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print('Model Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)

Model Accuracy: 0.6221999749718433
Precision: 0.6092781656990002
Recall: 0.6221999749718433
F1-score: 0.612898070825332


## Random Forest Classifier Implementation

<div class="alert alert-block alert-success">

Here's the implementation of the Random Forest classifier using Python and scikit-learn:

<strong>Model Creation and Fitting</strong>
We start by creating a Random Forest classifier with 100 estimators (decision trees) and a fixed random seed for reproducibility. The model is then fitted to the training data using the latitude and longitude features.

<strong>Prediction Function</strong>
We define a function called <code>predict_disaster_type</code> that takes latitude and longitude as input. This function prepares the input data, encodes it using one-hot encoding, and then predicts the probability distribution of disaster types using <code>predict_proba</code> from the trained Random Forest model. The disaster types are obtained from <code>model.classes_</code>.

<strong>Example Usage</strong>
For demonstration purposes, we provide an example latitude and longitude. The <code>predict_disaster_type</code> function is called to predict the disaster type based on the input location. The predicted ranking of disaster types along with their probabilities is then printed.

Feel free to replace the <code>input_latitude</code> and <code>input_longitude</code> with your own values for prediction.

</div>

In [20]:
# Create the Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the model to the data
model.fit(X, y)

# Function to predict the disaster type based on latitude and longitude
def predict_disaster_type(latitude, longitude):
    # Create a dataframe with the input latitude and longitude
    input_data = pd.DataFrame({'latitude': [latitude], 'longitude': [longitude]})

    # Encode the input data
    input_encoded = pd.get_dummies(input_data)

    # Make the prediction
    prediction_proba = model.predict_proba(input_encoded)[0]
    disaster_types = model.classes_

    # Get the ranking based on probability
    ranking = sorted(zip(disaster_types, prediction_proba), key=lambda x: x[1], reverse=True)
    
    return ranking

# Example usage
input_latitude = 30.316496
input_longitude = 78.032188

# Get the ranking of predicted disaster types
predicted_ranking = predict_disaster_type(input_latitude, input_longitude)

# Print the ranking
for rank, (disaster_type, probability) in enumerate(predicted_ranking, 1):
    print(f'Rank {rank}: {disaster_type} (Probability: {probability})')


Rank 1: flood (Probability: 0.7205454545454546)
Rank 2: earthquake (Probability: 0.17)
Rank 3: drought (Probability: 0.06033333333333333)
Rank 4: extreme temperature  (Probability: 0.024803030303030306)
Rank 5: storm (Probability: 0.02)
Rank 6: landslide (Probability: 0.004318181818181818)
Rank 7: mass movement (dry) (Probability: 0.0)
Rank 8: volcanic activity (Probability: 0.0)


## Predict and Visualize Disaster Function

<div class="alert alert-block alert-info">

This code demonstrates how to predict and visualize potential disaster types using the trained Random Forest classifier and visualize the predictions on an interactive map using the Folium library. 

Feel free to replace <code>input_latitude</code> and <code>input_longitude</code> with your own values for prediction and visualization.

Note: Make sure you have the required libraries installed and the Random Forest classifier trained before running this code.
</div>

In [21]:
import folium
from folium import Marker
from folium.plugins import HeatMap

# Load your dataset and prepare the feature matrix X and the target variable y
# (Assuming you have already loaded the dataset and prepared X_encoded and y)

# Create the Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Fit the model to the data
model.fit(X, y)

def predict_and_visualize_disaster(latitude, longitude):
    # Function to predict the disaster type based on latitude and longitude
    def predict_disaster_type(latitude, longitude):
        # Create a dataframe with the input latitude and longitude
        input_data = pd.DataFrame({'latitude': [latitude], 'longitude': [longitude]})

        # Encode the input data
        input_encoded = pd.get_dummies(input_data)

        # Make the prediction
        prediction_proba = model.predict_proba(input_encoded)[0]
        disaster_types = model.classes_

        # Get the ranking based on probability
        ranking = sorted(zip(disaster_types, prediction_proba), key=lambda x: x[1], reverse=True)

        return ranking

    # Get the ranking of predicted disaster types
    predicted_ranking = predict_disaster_type(latitude, longitude)

    # Print the ranking
    for rank, (disaster_type, probability) in enumerate(predicted_ranking, 1):
        print(f'Rank {rank}: {disaster_type} (Probability: {probability})')

    # Function to zoom to the specified location on an interactive map
    def zoom_to_location(latitude, longitude):
        
        # Get the top 3 predicted disaster types and their probabilities
        predicted_ranking_2 = predict_disaster_type(latitude, longitude)[:2]

        # Create a color map based on the top 3 disaster types
        color_map = {
            'flood': 'skyblue',
            'earthquake': 'brown',
            'storm': 'gray',
            'extreme temperature ': 'red',
            'landslide': 'darkbrown',
            'volcanic activity': 'orange',
            'drought': 'yellow',
            'mass movement (dry)': 'lightbrown'
        }

        # Create a Folium map centered at the specified location
        map_location = folium.Map(location=[latitude, longitude], zoom_start=8)

        # Create a HeatMap-like overlay covering the district of the specified location with more random points
        heat_data = [
            [random.uniform(latitude - 0.1, latitude + 0.1), random.uniform(longitude - 0.1, longitude + 0.1)]
            for _ in range(500)
        ]
        HeatMap(
            heat_data,
            radius=10,
            gradient={0.03: color_map[predicted_ranking_2[1][0]], 0.05: color_map[predicted_ranking_2[0][0]]}
        ).add_to(map_location)

        # Display the map
        display(map_location)

    # Call the function to zoom to the specified location
    zoom_to_location(latitude, longitude)

# Example usage
input_latitude = 28.644800
input_longitude = 77.216721

# Call the combined function to predict and visualize disaster for the specified location
predict_and_visualize_disaster(input_latitude, input_longitude)

Rank 1: extreme temperature  (Probability: 0.6955179220914242)
Rank 2: flood (Probability: 0.16019697574618735)
Rank 3: drought (Probability: 0.10036188819734207)
Rank 4: earthquake (Probability: 0.043923213965046067)
Rank 5: landslide (Probability: 0.0)
Rank 6: mass movement (dry) (Probability: 0.0)
Rank 7: storm (Probability: 0.0)
Rank 8: volcanic activity (Probability: 0.0)
