In [1]:
import pandas as pd

df = pd.read_csv("../data/processed/engineered_features.csv")

print("Dataset loaded")
print("Rows:", len(df))
df.head()


Dataset loaded
Rows: 18684


Unnamed: 0,State_x,Year,Annual_Rainfall,Avg_Temperature,State_y,Total_Production,Temp_Level,Avg_Rainfall_State,Rainfall_Deviation,Log_Production
0,ANDAMAN & NICOBAR ISLANDS,1997,2755.1,29.179167,Andhra Pradesh,21093500.0,High,2890.44605,-135.34605,16.864476
1,ANDAMAN & NICOBAR ISLANDS,1997,2755.1,29.179167,Arunachal Pradesh,267148.0,High,2890.44605,-135.34605,12.495562
2,ANDAMAN & NICOBAR ISLANDS,1997,2755.1,29.179167,Assam,5778334.0,High,2890.44605,-135.34605,15.569626
3,ANDAMAN & NICOBAR ISLANDS,1997,2755.1,29.179167,Bihar,19031372.0,High,2890.44605,-135.34605,16.761599
4,ANDAMAN & NICOBAR ISLANDS,1997,2755.1,29.179167,Goa,72538.0,High,2890.44605,-135.34605,11.19188


In [2]:
def climate_risk(row):
    if row["Rainfall_Deviation"] < -200 and row["Avg_Temperature"] > 28:
        return "High"
    elif row["Rainfall_Deviation"] < 0:
        return "Medium"
    else:
        return "Low"

df["Climate_Risk"] = df.apply(climate_risk, axis=1)

df["Climate_Risk"].value_counts()


Climate_Risk
Low       9167
Medium    6142
High      3375
Name: count, dtype: int64

In [3]:

X = df[[
    "Annual_Rainfall",
    "Avg_Temperature",
    "Rainfall_Deviation",
    "Log_Production"
]]

y = df["Climate_Risk"]

print(X.head())
print(y.head())


   Annual_Rainfall  Avg_Temperature  Rainfall_Deviation  Log_Production
0           2755.1        29.179167          -135.34605       16.864476
1           2755.1        29.179167          -135.34605       12.495562
2           2755.1        29.179167          -135.34605       15.569626
3           2755.1        29.179167          -135.34605       16.761599
4           2755.1        29.179167          -135.34605       11.191880
0    Medium
1    Medium
2    Medium
3    Medium
4    Medium
Name: Climate_Risk, dtype: object


In [4]:
from sklearn.model_selection import train_test_split
#train-test-split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print("Training samples:", len(X_train))
print("Testing samples:", len(X_test))


Training samples: 13078
Testing samples: 5606


In [5]:
#standardize features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [6]:
#naive bayes classification
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix

nb_model = GaussianNB()
nb_model.fit(X_train_scaled, y_train)

nb_pred = nb_model.predict(X_test_scaled)

print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, nb_pred))


Naive Bayes Accuracy: 0.9627185158758473
Confusion Matrix:
 [[ 962    0   57]
 [   0 2683   46]
 [   7   99 1752]]


In [7]:
#svm linear kernel
from sklearn.svm import SVC

svm_linear = SVC(kernel="linear")
svm_linear.fit(X_train_scaled, y_train)

svm_linear_pred = svm_linear.predict(X_test_scaled)

print("SVM Linear Accuracy:", accuracy_score(y_test, svm_linear_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, svm_linear_pred))


SVM Linear Accuracy: 0.9932215483410631
Confusion Matrix:
 [[1019    0    0]
 [   0 2722    7]
 [  31    0 1827]]


In [8]:
#svm rbf kernel
svm_rbf = SVC(kernel="rbf")
svm_rbf.fit(X_train_scaled, y_train)

svm_rbf_pred = svm_rbf.predict(X_test_scaled)

print("SVM RBF Accuracy:", accuracy_score(y_test, svm_rbf_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, svm_rbf_pred))


SVM RBF Accuracy: 0.9825187299322155
Confusion Matrix:
 [[ 999    0   20]
 [   0 2728    1]
 [  30   47 1781]]


In [9]:
print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_pred))
print("SVM Linear Accuracy:", accuracy_score(y_test, svm_linear_pred))
print("SVM RBF Accuracy:", accuracy_score(y_test, svm_rbf_pred))

Naive Bayes Accuracy: 0.9627185158758473
SVM Linear Accuracy: 0.9932215483410631
SVM RBF Accuracy: 0.9825187299322155
