In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
df = pd.read_csv("Pune.csv")

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Extract last available date
last_date = df['date'].max()

# Extract date-related features
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['hour'] = df['date'].dt.hour

# Drop the original date column
df.drop(columns=['date'], inplace=True)

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))  # Scale data between 0 and 1
scaled_data = scaler.fit_transform(df)

# Define sequence length
sequence_length = 30  # Using past 30 days to predict next day

# Prepare dataset
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])  # Predicting next day
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data, sequence_length)

# Get the index of 'temperature_2m' column
temperature_idx = df.columns.get_loc("temperature_2m")

# Build LSTM model
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(100, return_sequences=False),
    Dropout(0.2),
    Dense(50, activation='relu'),  # Use ReLU for hidden layers
    Dense(y.shape[1], activation='linear')  # Linear activation to allow both +ve/-ve temperature
])

# Compile model
model.compile(optimizer='adam', loss='mse')

# Train model
model.fit(X, y, epochs=20, batch_size=32, validation_split=0.1)

# Function to predict future weather for extended period (2025 & 2026)
def predict_future_weather_extended(start_date, days=730):
    global last_date  # Use the last known date from data
    future_weather_list = []
    input_seq = X[-1].reshape(1, sequence_length, X.shape[2])  # Start with last sequence

    for i in range(days):
        pred = model.predict(input_seq)  # Predict next step
        pred = pred[0]  # Extract from batch

        # Clip only non-temperature columns to avoid negative values
        for j in range(len(pred)):
            if j != temperature_idx:  # Only temperature_2m is allowed negative
                pred[j] = max(pred[j], 0)

        future_weather_list.append(pred)  # Store prediction
        input_seq = np.roll(input_seq, shift=-1, axis=1)  # Shift sequence
        input_seq[0, -1, :] = pred  # Add new prediction

    # Convert predictions back to original scale
    future_weather_array = scaler.inverse_transform(np.array(future_weather_list))
    
    # Ensure only 'temperature_2m' allows negatives
    for i in range(future_weather_array.shape[1]):
        if df.columns[i] != "temperature_2m":
            future_weather_array[:, i] = np.maximum(future_weather_array[:, i], 0)

    # Create a DataFrame with future dates
    future_dates = [start_date + pd.Timedelta(days=i) for i in range(days)]
    future_weather_df = pd.DataFrame(future_weather_array, columns=df.columns)
    future_weather_df.insert(0, 'date', future_dates)

    return future_weather_df

# Predict weather for 2025 and 2026 (730 days from last known date)
future_weather_2025_2026 = predict_future_weather_extended(last_date, days=730)

# Define file path for saving

# Save predictions to CSV
future_weather_2025_2026.to_csv('predictions.csv', index=False)