In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load your dataset
data = pd.read_csv('weatherHistory.csv')

# Convert 'Formatted Date' to datetime
data['Date'] = pd.to_datetime(data['Formatted Date'], errors='coerce')

# Sort by date to ensure chronological order
data = data.sort_values('Date')

# Create a new column 'Rain Tomorrow' which is 1 if it rained the next day, else 0
data['Rain Tomorrow'] = data['Precip Type'].shift(-1).apply(lambda x: 1 if x == 'rain' else 0)

# Drop rows with missing target values
data = data.dropna(subset=['Rain Tomorrow'])

# Define features and target variable
features = ['Temperature (C)', 'Humidity', 'Wind Speed (km/h)', 'Pressure (millibars)']
X = data[features]
y = data['Rain Tomorrow']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]  # Probability of rain

# Print evaluation metrics
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Example of predicting for a new data point
new_data = np.array([[20, 80, 10, 1010]])  # Example features: [Temperature, Humidity, Wind Speed, Pressure]
predicted_prob = model.predict_proba(new_data)[0, 1] * 100
rain_prediction = "Rain" if predicted_prob > 50 else "No Rain"
print(f"Predicted Rain Probability for the new data point: {predicted_prob:.2f}%")
print("-#-++++++++++++++++++++++++OUTPUT+++++++++++++++++++++++++++")
print(f"Prediction: {rain_prediction}")
print("=====================================================================")