In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

# Load dataset (replace with 'weatherAUS.csv' if downloaded)
try:
    data = pd.read_csv('C:\\Users\\DELL\\OneDrive\\Desktop\\aiml\\weatherAUS.csv\\weatherAUS.csv')
except FileNotFoundError:
    print("weatherAUS.csv not found. Using sample dataset.")
    sample_data = {
        'MinTemp': [12.0, 15.5, 10.2, 8.9, 14.3, 13.2, 11.8, 9.5, 16.0, 14.7],
        'MaxTemp': [22.1, 25.3, 19.8, 17.5, 23.0, 21.4, 20.0, 18.2, 24.5, 22.8],
        'Rainfall': [0.0, 2.4, 0.0, 1.2, 0.0, 0.8, 0.0, 3.0, 0.0, 1.5],
        'Humidity3pm': [45, 78, 52, 65, 50, 60, 48, 80, 55, 70],
        'WindSpeed3pm': [20, 15, 25, 10, 18, 22, 17, 12, 19, 14],
        'RainTomorrow': ['No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes']
    }
    data = pd.DataFrame(sample_data)

# Select features and target
features = ['MinTemp', 'MaxTemp', 'Rainfall', 'Humidity3pm', 'WindSpeed3pm']
X = data[features].dropna()
y = data.loc[X.index, 'RainTomorrow'].map({'Yes': 1, 'No': 0}).dropna()

# Align X and y after dropping NaNs
X = X.loc[y.index]

# Display dataset info
print("Selected Features:", features)
print("Sample Data (first 5 rows):")
print(X.head())
print("Sample Targets (first 5):", y[:5].values)

# Preprocess: Split and scale data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Logistic Regression model
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print("\nModel Performance:")
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['No Rain', 'Rain']))

# Predict for a sample
sample = X_test.iloc[0].values.reshape(1, -1)
sample_scaled = scaler.transform(sample)
prediction = model.predict(sample_scaled)[0]
print("\nSample Prediction:")
print(f"Features: {X_test.iloc[0].to_dict()}")
print(f"Predicted: {'Rain' if prediction == 1 else 'No Rain'}")
print(f"Actual: {'Rain' if y_test.iloc[0] == 1 else 'No Rain'}")

# Visualize actual vs. predicted (Humidity3pm vs. RainTomorrow)
plt.figure(figsize=(8, 6))
plt.scatter(X_test['Humidity3pm'], y_test, color='blue', label='Actual', alpha=0.5)
plt.scatter(X_test['Humidity3pm'], y_pred, color='red', label='Predicted', marker='x', alpha=0.5)
plt.xlabel('Humidity at 3pm (%)')
plt.ylabel('Rain Tomorrow (1=Rain, 0=No Rain)')
plt.title('Actual vs. Predicted Rain Tomorrow')
plt.legend()
plt.grid(True)
plt.savefig('weather_predictions.png')
plt.close()

# Feature importance (based on coefficients)
feature_importance = pd.Series(model.coef_[0], index=features).sort_values(ascending=False)
print("\nFeature Importance (Coefficients):")
print(feature_importance)

Selected Features: ['MinTemp', 'MaxTemp', 'Rainfall', 'Humidity3pm', 'WindSpeed3pm']
Sample Data (first 5 rows):
   MinTemp  MaxTemp  Rainfall  Humidity3pm  WindSpeed3pm
0     13.4     22.9       0.6         22.0          24.0
1      7.4     25.1       0.0         25.0          22.0
2     12.9     25.7       0.0         30.0          26.0
3      9.2     28.0       0.0         16.0           9.0
4     17.5     32.3       1.0         33.0          20.0
Sample Targets (first 5): [0. 0. 0. 0. 0.]

Model Performance:
Accuracy: 0.83
Classification Report:
              precision    recall  f1-score   support

     No Rain       0.85      0.95      0.90     21285
        Rain       0.71      0.39      0.51      5955

    accuracy                           0.83     27240
   macro avg       0.78      0.67      0.70     27240
weighted avg       0.82      0.83      0.81     27240






Sample Prediction:
Features: {'MinTemp': 16.4, 'MaxTemp': 25.3, 'Rainfall': 0.2, 'Humidity3pm': 72.0, 'WindSpeed3pm': 20.0}
Predicted: No Rain
Actual: No Rain

Feature Importance (Coefficients):
Humidity3pm     1.301129
Rainfall        0.296799
WindSpeed3pm    0.196800
MinTemp         0.143294
MaxTemp         0.004621
dtype: float64
