



##  Targets for Each Dataset

| Dataset       | Target Column              |
|---------------|----------------------------|
| Landslide     | `Landslide_Occurred`       |
| GLOF          | `GLOF_Occurred`            |
| Earthquake    | `Quake_Occurred`           |
| Liquefaction  | `Liquefaction_Risk`        |
| Sinkhole      | `Sinkhole_Reported`        |
| Tsunami       | `Alert_Issued`             |
| Lahar         | `Lahar_Triggered`          |





## Landslide

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("cleaned_landslide_dataset.csv")
df.head()


Unnamed: 0,Date,Latitude,Longitude,Rainfall_mm,Slope_deg,Soil_Moisture_%,Landslide_Occurred,Risk_Score
0,2024-02-22,0.729087,0.286249,0.748475,0.702628,0.066064,0.0,0.74396
1,2024-02-20,0.760593,0.354387,0.546692,0.966397,0.992464,0.0,0.610419
2,2024-02-01,0.700868,0.797128,0.608098,0.067206,0.182869,0.0,0.526354
3,2024-01-20,0.692912,0.477937,0.711671,0.996463,0.067571,1.0,0.757021
4,2024-01-26,0.303237,0.113547,0.302809,0.473219,0.854057,0.0,0.325369


In [2]:
X = df.drop(columns=["Landslide_Occurred", "Date"])  # Drop target + Date
y = df["Landslide_Occurred"]


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [4]:
print("Train set shape:", X_train.shape)
print("Test set shape:", X_test.shape)
print("Target distribution:", y.value_counts())


Train set shape: (280, 6)
Test set shape: (70, 6)
Target distribution: Landslide_Occurred
0.0    176
1.0    174
Name: count, dtype: int64


## GLOF

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("cleaned_glof_dataset.csv")
df.head()

Unnamed: 0,Date,Latitude,Longitude,Glacier_Volume_million_m3,Lake_Level_m,Temperature_C,GLOF_Occurred,Melt_Risk
0,2024-01-07,0.429345,0.215513,0.38427,0.105926,0.17274,0.0,0.087366
1,2024-02-02,0.128226,0.735562,0.258427,0.185496,0.990553,1.0,0.324564
2,2024-02-11,0.264364,0.151522,0.224719,0.78412,0.038462,0.0,0.627743
3,2024-02-09,0.731197,0.253567,0.959551,0.292933,0.698381,0.0,0.353686
4,2024-01-31,0.368678,0.320414,0.179775,0.289407,0.923752,1.0,0.397721


In [6]:
X = df.drop(columns=["GLOF_Occurred", "Date"])  # Drop target + date
y = df["GLOF_Occurred"]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [8]:
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)
print("Target value counts:", y.value_counts())


Train shape: (280, 6)
Test shape: (70, 6)
Target value counts: GLOF_Occurred
1.0    182
0.0    168
Name: count, dtype: int64


## Earthquake

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("cleaned_earthquake_dataset.csv")
df.head()

Unnamed: 0,Date,Latitude,Longitude,Magnitude,Depth_km,Ground_Acceleration_g,Quake_Occurred,Energy_Index
0,2024-01-04,0.089176,0.074861,0.525,0.437452,0.967213,1.0,0.091418
1,2024-03-29,0.306089,0.057281,0.525,0.897061,0.540984,1.0,0.026684
2,2024-03-10,0.198002,0.380455,0.375,0.931374,0.098361,1.0,0.016457
3,2024-03-02,0.911789,0.231177,0.6,0.461609,0.344262,0.0,0.092481
4,2024-03-11,0.086917,0.869152,0.125,0.520388,0.442623,1.0,0.038131


In [10]:
X = df.drop(columns=["Quake_Occurred", "Date"])
y = df["Quake_Occurred"]


In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [12]:
print("Train set:", X_train.shape)
print("Test set:", X_test.shape)
print("Target distribution:", y.value_counts())


Train set: (280, 6)
Test set: (70, 6)
Target distribution: Quake_Occurred
1.0    184
0.0    166
Name: count, dtype: int64


## Soil Liquefaction

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("cleaned_liquefaction_dataset.csv")

In [14]:
X = df.drop(columns=["Liquefaction_Risk", "Date"])
y = df["Liquefaction_Risk"]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [16]:
print("Train shape:",X_train.shape)
print("Test shape:", X_test.shape)
print("Risk class distribution:", y.value_counts())

Train shape: (280, 8)
Test shape: (70, 8)
Risk class distribution: Liquefaction_Risk
1.0    143
0.0    107
0.5    100
Name: count, dtype: int64


## Sink Hole dataset

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("cleaned_sinkhole_dataset.csv")


In [18]:
X = df.drop(columns=["Sinkhole_Reported", "Date"])
y = df["Sinkhole_Reported"]


In [19]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [20]:
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)
print("Sinkhole class balance:\n", y.value_counts())


Train shape: (280, 5)
Test shape: (70, 5)
Sinkhole class balance:
 Sinkhole_Reported
0.0    181
1.0    169
Name: count, dtype: int64


## Tsunami

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("cleaned_tsunami_dataset.csv")


In [22]:
X = df.drop(columns=["Alert_Issued", "Date"])
y = df["Alert_Issued"]


In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [24]:
print("Train set:", X_train.shape)
print("Test set:", X_test.shape)
print("Alerts distribution:\n", y.value_counts())


Train set: (280, 6)
Test set: (70, 6)
Alerts distribution:
 Alert_Issued
0.0    181
1.0    169
Name: count, dtype: int64


## Lahar Dataset

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("cleaned_lahar_dataset.csv")


In [26]:
X = df.drop(columns=["Lahar_Triggered", "Date"])
y = df["Lahar_Triggered"]


In [27]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [28]:
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)
print("Trigger count:\n", y.value_counts())


Train shape: (280, 6)
Test shape: (70, 6)
Trigger count:
 Lahar_Triggered
1.0    193
0.0    157
Name: count, dtype: int64
