<a href="https://colab.research.google.com/github/dengathitu/Climate_Data_Time_Series/blob/main/Climate_Data_Time_Series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1. Load the Dataset**

You've already loaded the dataset using Pandas in your notebook.

In [14]:
import pandas as pd
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense


from zipfile import ZipFile

uri = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip"
zip_path = keras.utils.get_file(origin=uri, fname="jena_climate_2009_2016.csv.zip")
zip_file = ZipFile(zip_path)
zip_file.extractall()
csv_path = "jena_climate_2009_2016.csv"

df = pd.read_csv(csv_path)

# **2. Data Preprocessing**

Time series data often requires specific preprocessing steps.

Feature Scaling: Scale the numerical features to a similar range.
Creating Sequences: Since RNNs process sequential data, you need to create input sequences and corresponding target values.

In [9]:
# Select relevant features
features = ['p (mbar)', 'T (degC)', 'rho (g/m**3)'] # Example features, choose based on your analysis
df_features = df[features]

# Scale the features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_features)

# Create sequences
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length):
        sequences.append(data[i:i + sequence_length])
    return np.array(sequences)

sequence_length = 24 # Example sequence length (e.g., 24 hours)
sequences = create_sequences(scaled_features, sequence_length)

# Define target variable (e.g., predicting temperature one step ahead)
target_index = features.index('T (degC)') # Index of the target feature
targets = scaled_features[sequence_length:, target_index]

# **3. Split the Dataset**

For time series data, you typically split chronologically to avoid data leakage.

In [5]:
# Split into training, validation, and test sets
train_split = int(0.7 * len(sequences))
val_split = int(0.85 * len(sequences))

x_train, x_val, x_test = sequences[:train_split], sequences[train_split:val_split], sequences[val_split:]
y_train, y_val, y_test = targets[:train_split], targets[train_split:val_split], targets[val_split:]

print(f"Training data shape: {x_train.shape}")
print(f"Validation data shape: {x_val.shape}")
print(f"Test data shape: {x_test.shape}")

Training data shape: (294368, 24, 3)
Validation data shape: (63079, 24, 3)
Test data shape: (63080, 24, 3)


# **4. Build the RNN Model**

You can use Keras to build your RNN model.

In [11]:
model = Sequential()
model.add(SimpleRNN(units=32, activation='relu', input_shape=(sequence_length, len(features))))
model.add(Dense(units=1)) # Output layer for predicting a single value

model.compile(optimizer='adam', loss='mse')

model.summary()

  super().__init__(**kwargs)


# **5. Train the Model**

Train the model on the training data and use the validation data for monitoring performance.

In [7]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val))

Epoch 1/10
[1m9199/9199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 6ms/step - loss: 0.0074 - val_loss: 2.1656e-05
Epoch 2/10
[1m9199/9199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 6ms/step - loss: 1.8415e-05 - val_loss: 1.2957e-05
Epoch 3/10
[1m9199/9199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 6ms/step - loss: 1.6072e-05 - val_loss: 1.2750e-05
Epoch 4/10
[1m9199/9199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 6ms/step - loss: 1.4504e-05 - val_loss: 1.3700e-05
Epoch 5/10
[1m9199/9199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 6ms/step - loss: 1.3780e-05 - val_loss: 1.5125e-05
Epoch 6/10
[1m9199/9199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 6ms/step - loss: 1.3872e-05 - val_loss: 1.3754e-05
Epoch 7/10
[1m9199/9199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 6ms/step - loss: 1.3200e-05 - val_loss: 1.9211e-05
Epoch 8/10
[1m9199/9199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 6ms/ste

# **6. Optimize and Evaluate**

Optimization: You can optimize your pipeline by experimenting with different hyperparameters (e.g., number of units, activation functions, optimizer, learning rate), sequence lengths, features, and even different types of RNN layers (like LSTM or GRU). Use the validation set to guide your decisions.
Evaluation: After finding the best pipeline, evaluate it on the test set.

In [12]:
loss = model.evaluate(x_test, y_test)
print(f"Test loss: {loss}")

# You can also calculate other metrics like R-squared or Mean Absolute Error (MAE)
from sklearn.metrics import mean_absolute_error, r2_score

predictions = model.predict(x_test)

mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f"Test MAE: {mae}")
print(f"Test R2: {r2}")

[1m1972/1972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - loss: 0.9861
Test loss: 1.059733510017395
[1m1972/1972[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step
Test MAE: 1.0211098215691017
Test R2: -62.63765025148336


# **7. Use Model Attributes and Methods**

You can access model attributes and use methods for further analysis and deployment.

In [13]:
# Get model layers
print(model.layers)

# Get weights of a layer
print(model.layers[0].get_weights())

# Make predictions
sample_sequence = x_test[0].reshape(1, sequence_length, len(features))
predicted_value = model.predict(sample_sequence)
print(f"Predicted value: {predicted_value}")

[<SimpleRNN name=simple_rnn_1, built=True>, <Dense name=dense_1, built=True>]
[array([[-0.07776594, -0.37373573,  0.0630379 , -0.0490762 , -0.34550768,
         0.20495346,  0.11320016, -0.3524858 , -0.24686407, -0.06491673,
        -0.05401865,  0.19419464,  0.07123339, -0.33866635,  0.36392525,
         0.3533021 ,  0.20772097, -0.10102913, -0.28132161,  0.16052547,
        -0.34671456,  0.08844224,  0.03499475,  0.26180926, -0.02822104,
         0.15702632,  0.3018411 ,  0.35485306,  0.34840927,  0.18994907,
        -0.01518282, -0.02538961],
       [ 0.14107385, -0.33482468, -0.07361153,  0.05269152, -0.03859329,
         0.31170532,  0.06218708, -0.05230317,  0.27822623,  0.28114858,
         0.3161395 , -0.10014701,  0.2343277 ,  0.39168963,  0.37417647,
         0.01176816, -0.31296223,  0.06911892, -0.13593218,  0.33429596,
        -0.39971566, -0.4123844 ,  0.1616526 , -0.34361294, -0.08346364,
         0.30285487,  0.02804887,  0.01104361,  0.1212745 , -0.13387614,
        -0