**Project Title**

ResQRelief: Integrated Disaster Impact Prediction and Response Management System


> Add blockquote



**Problem Statement**

Disasters such as floods, earthquakes, and cyclones cause large-scale damage to lives, infrastructure, and the economy. The major challenge during these events is not only predicting when and how severe they will be, but also managing the response effectively to reduce losses and support faster recovery.  

Most existing projects focus on a single type of disaster, which limits their usefulness.  
There is a need for an integrated system that can analyze different disaster datasets, predict potential impacts, and provide insights to support timely, data-driven decision-making for response and resource management.


**Description**

ResQRelief uses disaster-related datasets (starting with flood impact data) to build predictive models that estimate the severity and consequences of disasters.  
By analyzing historical patterns and contextual factors, the system can guide emergency preparedness, resource allocation, and response planning.  
While the initial focus is on floods, the framework is designed to extend to other disasters such as earthquakes and cyclones.


In [None]:
#Importing Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#ML related imports

#Model Selection
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score

#Preprocessing
from sklearn.preprocessing import StandardScaler, LabelEncoder

#Models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression

#Metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_squared_error, r2_score

#Save Models
import joblib

In [None]:
#Load Disaster(Flood) Datasets

flood_data =pd.read_csv("flood.csv")
categories_data = pd.read_csv("disaster_categories.csv")
messages_data = pd.read_csv("disaster_messages.csv")
census_data = pd.read_csv("india-districts-census-2011.csv")  # Make sure file name matches

#display first few rows
print("Flood Data:")
print(flood_data.head(),"\n")

print("Disaster Categories Data:")
print(categories_data.head(), "\n")

print("Disaster Messages Data:")
print(messages_data.head(), "\n")

print("Census/Population Data:")
print(census_data.head(), "\n")

In [None]:
import os
print(os.getcwd())


In [None]:
#Flood Data
print("Flood Data Info:")
print(flood_data.info(), "\n")
print("Flood Data Description:")
print(flood_data.describe(), "\n")
print("Flood Data Missing Values:")
print(flood_data.isnull().sum(), "\n")


In [None]:
#Disaster Categories Data
print("Disaster Categories Data Info:")
print(categories_data.info(), "\n")
print("Disaster Categories Data Description:")
print(categories_data.describe(), "\n")
print("Disaster Categories Data Missing Values:")
print(categories_data.isnull().sum(), "\n")


In [None]:
# --- Disaster Messages Data ---
print("Disaster Messages Data Info:")
print(messages_data.info(), "\n")
print("Disaster Messages Data Description:")
print(messages_data.describe(), "\n")
print("Disaster Messages Data Missing Values:")
print(messages_data.isnull().sum(), "\n")

In [None]:
# --- Census/Population Data ---
print("Census Data Info:")
print(census_data.info(), "\n")
print("Census Data Description:")
print(census_data.describe(), "\n")
print("Census Data Missing Values:")
print(census_data.isnull().sum(), "\n")

## Week 2: Exploratory Data Analysis (EDA), Data Transformation, and Feature Selection

### Messages Data Transformation

In [None]:
# --- Merge messages and categories datasets ---
df_messages_combined = messages_data.merge(categories_data, on='id')
print("Combined Messages & Categories Data Info:")
df_messages_combined.info()
print("\nCombined Messages & Categories Data Head:")
print(df_messages_combined.head())

# --- Transform the 'categories' column into multiple binary columns ---
# Split the categories column by semicolon
categories_split = df_messages_combined['categories'].str.split(';', expand=True)

# Create new column names from the first row of categories_split
row = categories_split.iloc[0]
category_colnames = row.apply(lambda x: x[:-2])
categories_split.columns = category_colnames

# Convert each new column to a binary integer (0 or 1)
for column in categories_split:
    categories_split[column] = categories_split[column].str[-1].astype(int)

# Drop the original 'categories' column from the combined dataframe
df_messages_combined.drop('categories', axis=1, inplace=True)

# Concatenate the new binary columns to the combined dataframe
df_messages_final = pd.concat([df_messages_combined, categories_split], axis=1)

print("\nFinal Transformed Messages Data Info:")
df_messages_final.info()
print("\nFinal Transformed Messages Data Head:")
print(df_messages_final.head())

In [None]:
# --- EDA for Transformed Message Data ---
# Count the number of messages for each category
category_counts = df_messages_final.iloc[:, 4:].sum().sort_values(ascending=False)

# Plot a bar chart to visualize the distribution of categories
plt.figure(figsize=(15, 7))
sns.barplot(x=category_counts.index, y=category_counts.values)
plt.title('Distribution of Disaster Message Categories')
plt.xlabel('Category')
plt.ylabel('Number of Messages')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

### Flood Data

In [None]:
# Flood Data Shape and Columns
print("Flood Data Shape:", flood_data.shape)
print("Flood Data Columns:", flood_data.columns.tolist())

# Univariate analysis for Flood Data

target_col = "FloodProbability"  # Corrected target column

# Countplot for target column
sns.countplot(data=flood_data, x=target_col)
plt.title(f"Distribution of {target_col}")
plt.show()

# Histograms for all numeric columns
num_cols = flood_data.select_dtypes(include=np.number).columns

fig, axes = plt.subplots(len(num_cols), 1, figsize=(8, len(num_cols) * 3))

for ax, col in zip(axes, num_cols):
    sns.histplot(flood_data[col], kde=True, ax=ax)
    ax.set_title(f"Distribution of {col}")

plt.tight_layout()
plt.show()


In [None]:
# Bivariate Analysis for Flood Data

# Boxplot: Relationship of each numeric feature with FloodProbability
fig, axes = plt.subplots(len(num_cols), 1, figsize=(8, len(num_cols) * 3))

for ax, col in zip(axes, num_cols):
    if col != "FloodProbability":  # skip target column
        sns.boxplot(data=flood_data, x="FloodProbability", y=col, ax=ax)
        ax.set_title(f"{col} vs FloodProbability")

plt.tight_layout()
plt.show()

# Scatter plots: Example relationship between two important features
plt.figure(figsize=(8,6))
sns.scatterplot(data=flood_data, x="MonsoonIntensity", y="FloodProbability")
plt.title("Monsoon Intensity vs Flood Probability")
plt.show()

plt.figure(figsize=(8,6))
sns.scatterplot(data=flood_data, x="Urbanization", y="FloodProbability")
plt.title("Urbanization vs Flood Probability")
plt.show()


In [None]:
# Correlation Heatmap for Flood Data

plt.figure(figsize=(12,8))
corr = flood_data.corr()
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", cbar=True)
plt.title("Correlation Heatmap of Flood Data Features")
plt.show()

# Pairplot for selected important features
selected_cols = ["MonsoonIntensity", "Urbanization", "Deforestation", "RiverManagement", "FloodProbability"]

sns.pairplot(flood_data[selected_cols], hue="FloodProbability", diag_kind="kde")
plt.suptitle("Pairplot of Key Features Colored by Flood Probability", y=1.02)
plt.show()


In [None]:
# Pairplot for Flood Data
sns.pairplot(flood_data, diag_kind="kde")
plt.show()


In [None]:
# Shape & Columns
print("Census Data Shape:", census_data.shape)
print("Census Data Columns:", census_data.columns)

# Histograms for numeric features
num_cols_census = census_data.select_dtypes(include=np.number).columns
fig, axes = plt.subplots(len(num_cols_census), 1, figsize=(8, len(num_cols_census)*3))
for ax, col in zip(axes, num_cols_census):
    sns.histplot(census_data[col], kde=True, ax=ax)
    ax.set_title(f"Distribution of {col}")
plt.tight_layout()
plt.show()

# Correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(census_data.corr(), annot=False, cmap="YlGnBu")
plt.title("Correlation Heatmap - Census Data")
plt.show()


In [None]:
# Features and target
X = flood_data.drop(target_col, axis=1)
y = flood_data[target_col]

# Correlation with target
corr_with_target = flood_data.corr()[target_col].sort_values(ascending=False)
print("Correlation with Flood Probability:\n", corr_with_target)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Scaling done. X_train_scaled shape:", X_train_scaled.shape)
