In [36]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle
import os

In [37]:
df = pd.read_csv('cloudburst.csv')

In [38]:
print("\nDataset shape:", df.shape)
print("\nColumns:")
print(df.columns.tolist())


Dataset shape: (1224, 7)

Columns:
['TEMPERATURE', 'RELATIVE HUMIDITY', 'DEWPOINT', 'SEALEVEL PRESSURE', 'CLOUD COVER', 'WIND SPEED ', 'output']


In [39]:
df.head()

Unnamed: 0,TEMPERATURE,RELATIVE HUMIDITY,DEWPOINT,SEALEVEL PRESSURE,CLOUD COVER,WIND SPEED,output
0,-6.9,93,-7.8,1010.0,100,3.0,0
1,-6.5,92,-7.6,1010.6,92,3.5,0
2,-5.3,75,-9.1,1010.2,89,2.0,0
3,-4.9,82,-7.5,1010.3,82,1.8,0
4,-4.3,88,-6.0,1010.2,78,4.2,0


In [40]:
feature_cols = [
    'TEMPERATURE',
    'RELATIVE HUMIDITY',
    'DEWPOINT',
    'SEALEVEL PRESSURE',
    'CLOUD COVER',
    'WIND SPEED '
]

target_col = "output"

In [41]:
X = df[feature_cols].copy()
y = df[target_col].copy()

In [42]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [43]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [44]:
svm = SVC(kernel='rbf', C=2.0, gamma='scale', probability=True)
svm.fit(X_train_scaled, y_train)

In [61]:
logreg = LogisticRegression(
    penalty="elasticnet",
    l1_ratio=0.3,
    solver="saga",
    class_weight='balanced',
    max_iter=1200,
    C=1.0
)

logreg.fit(X_train_scaled, y_train)

In [58]:
def evaluate(name, model, X_t, y_t):
    print("\n====================")
    print(name)
    print("====================")

    pred = model.predict(X_t)
    print("Accuracy:", accuracy_score(y_t, pred))
    print("\nClassification Report:\n", classification_report(y_t, pred))
    print("Confusion Matrix:\n", confusion_matrix(y_t, pred))

In [59]:
evaluate("Logistic Regression", logreg, X_test_scaled, y_test)
evaluate("SVM (RBF)", svm, X_test_scaled, y_test)


Logistic Regression
Accuracy: 0.8367346938775511

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.96      0.91       204
           1       0.53      0.22      0.31        41

    accuracy                           0.84       245
   macro avg       0.69      0.59      0.61       245
weighted avg       0.80      0.84      0.81       245

Confusion Matrix:
 [[196   8]
 [ 32   9]]

SVM (RBF)
Accuracy: 0.8081632653061225

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.93      0.89       204
           1       0.38      0.22      0.28        41

    accuracy                           0.81       245
   macro avg       0.62      0.57      0.58       245
weighted avg       0.77      0.81      0.79       245

Confusion Matrix:
 [[189  15]
 [ 32   9]]


In [62]:
with open('cloudburst_logreg.pkl', 'wb') as f:
    pickle.dump(logreg, f)

with open('svm_cloudburst.pkl', 'wb') as f:
    pickle.dump(svm, f)

with open('scaler_cloudburst.pkl', 'wb') as f:
    pickle.dump(scaler, f)
