In [66]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report

## 🔧 1. Preprocessing

In [67]:
# Load dataset

df = pd.read_csv('weather_forecast_data.csv')

In [68]:
df.head()

Unnamed: 0,Temperature,Humidity,Wind_Speed,Cloud_Cover,Pressure,Rain
0,23.720338,89.592641,7.335604,50.501694,1032.378759,rain
1,27.879734,46.489704,5.952484,4.990053,992.61419,no rain
2,25.069084,83.072843,1.371992,14.855784,1007.23162,no rain
3,23.62208,74.367758,7.050551,67.255282,982.632013,rain
4,20.59137,96.858822,4.643921,47.676444,980.825142,no rain


In [69]:
df['Rain'] = df['Rain'].map({
    'rain' : 1,
    'no rain' : 0
})

In [70]:
# Split features and target

X = df.drop(columns=['Rain']).to_numpy()
y = df['Rain'].to_numpy()

In [71]:
# Train-test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=8)

In [72]:
# Check for missing values

print("Any NaNs in X_train:", np.isnan(X_train).any())
print("Any NaNs in X_test:", np.isnan(X_test).any())

Any NaNs in X_train: False
Any NaNs in X_test: False


In [73]:
# Feature scaling

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## 🤖 2. Modeling

In [74]:
model = LogisticRegression()

model.fit(X_train_scaled, y_train)

## 🔁 3. Cross-Validation

In [75]:
cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='accuracy')
print("Cross-validated accuracy scores:", cv_scores)
print("Mean accuracy:", cv_scores.mean())

Cross-validated accuracy scores: [0.92   0.935  0.9475 0.915  0.9325]
Mean accuracy: 0.93


## 📊 4. Evaluation & Prediction

In [76]:
y_pred = model.predict(X_test_scaled)

print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.94      0.97      0.96       440
           1       0.73      0.58      0.65        60

    accuracy                           0.92       500
   macro avg       0.84      0.78      0.80       500
weighted avg       0.92      0.92      0.92       500

