# 🌦️ Weather Forecasting using Machine Learning
Predict next day's temperature using regression-based machine learning models.

**Dataset:** Synthetic historical weather data
**Tools:** Python, Pandas, Scikit-learn, Matplotlib

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

In [None]:
df = pd.read_csv('weather_forecasting_sample.csv')
df.head()

In [None]:
df.describe()

In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

In [None]:
X = df.drop(columns=['Next Day Temp'])
y = df['Next Day Temp']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_test)

print('Linear Regression MAE:', mean_absolute_error(y_test, lr_preds))
print('Linear Regression R2:', r2_score(y_test, lr_preds))

In [None]:
param_grid = {'n_estimators': [50, 100], 'max_depth': [3, 5, None]}
rf = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=3)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

print('Random Forest MAE:', mean_absolute_error(y_test, rf_preds))
print('Random Forest R2:', r2_score(y_test, rf_preds))
print('Best Params:', rf.best_params_)

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(y_test.values, label='Actual', marker='o')
plt.plot(rf_preds, label='Predicted', marker='x')
plt.title('Random Forest Predictions vs Actual')
plt.xlabel('Sample Index')
plt.ylabel('Next Day Temperature')
plt.legend()
plt.tight_layout()
plt.show()