Epoch 0, Loss: 0.2511
Epoch 1000, Loss: 0.2499
Epoch 2000, Loss: 0.2493
Epoch 3000, Loss: 0.2439
Epoch 4000, Loss: 0.2094

Predictions after training:
[0 0] -> 0.217 (Target: 0)
[0 1] -> 0.661 (Target: 1)
[1 0] -> 0.618 (Target: 1)
[1 1] -> 0.541 (Target: 0)


In [None]:
import os

# To rename a file within the Colab environment using Python:

# 1. Create a dummy file for demonstration (if it doesn't exist)
dummy_old_filename = 'my_old_file.txt'
dummy_new_filename = 'my_new_file.txt'

if not os.path.exists(dummy_old_filename):
    with open(dummy_old_filename, 'w') as f:
        f.write('This is a temporary test file for renaming.')
    print(f"Created '{dummy_old_filename}' for demonstration.")
else:
    print(f"'{dummy_old_filename}' already exists, skipping creation.")

# 2. Rename the file
# Be careful with this operation, as it will overwrite the target file
# if it already exists without warning on some systems, or raise an error on others.
if os.path.exists(dummy_old_filename):
    try:
        os.rename(dummy_old_filename, dummy_new_filename)
        print(f"Successfully renamed '{dummy_old_filename}' to '{dummy_new_filename}'.")
    except FileNotFoundError:
        print(f"Error: '{dummy_old_filename}' not found.")
    except Exception as e:
        print(f"An error occurred while renaming: {e}")
else:
    print(f"Cannot rename: '{dummy_old_filename}' does not exist.")

# 3. Verify the renaming
print(f"Does '{dummy_old_filename}' still exist? {os.path.exists(dummy_old_filename)}")
print(f"Does '{dummy_new_filename}' now exist? {os.path.exists(dummy_new_filename)}")

# You can check the file browser on the left sidebar to see the changes.


Created 'my_old_file.txt' for demonstration.
Successfully renamed 'my_old_file.txt' to 'my_new_file.txt'.
Does 'my_old_file.txt' still exist? False
Does 'my_new_file.txt' now exist? True


In [1]:
# Here is a “student exam pass/fail” scenario with full code and simple explanations.

# Scenario: Will a student pass the exam?
# Inputs (2 features):

# Study hours (0 = didn’t study, 1 = studied enough)

# Class attendance (0 = poor, 1 = good)

# Output:

# 1 = passes exam

# 0 = fails exam

# Rule (hidden from model, but intuitive):

# Student usually passes if both study and attendance are good.

import numpy as np

# 1. Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)


# 2. Dataset: [study_hours, attendance]
# 0 = low, 1 = high/good
X = np.array([
    [0, 0],   # didn't study, didn't attend
    [0, 1],   # didn't study, attended class
    [1, 0],   # studied, but didn't attend
    [1, 1]    # studied and attended
])

# y: did the student pass? (0 = fail, 1 = pass)
y = np.array([
    [0],  # low study, low attendance -> fail
    [0],  # low study, good attendance -> likely fail
    [0],  # good study, low attendance -> risky, mark fail
    [1]   # good study, good attendance -> pass
])


# 3. Initialize weights and biases
np.random.seed(1)
weights_input_hidden = np.random.rand(2, 2)   # 2 inputs -> 2 hidden neurons
weights_hidden_output = np.random.rand(2, 1)  # 2 hidden -> 1 output neuron
bias_hidden = np.zeros((1, 2))
bias_output = np.zeros((1, 1))

# Training settings
learning_rate = 0.1
epochs = 5000


# 4. Training loop
for epoch in range(epochs):

    # ---- Forward pass ----
    # Hidden layer
    hidden_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)

    # Output layer
    final_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    final_output = sigmoid(final_input)

    # ---- Error ----
    error = y - final_output

    # ---- Backpropagation ----
    d_output = error * sigmoid_derivative(final_output)
    d_hidden = d_output.dot(weights_hidden_output.T) * sigmoid_derivative(hidden_output)

    # ---- Update weights and biases ----
    weights_hidden_output += hidden_output.T.dot(d_output) * learning_rate
    bias_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate

    weights_input_hidden += X.T.dot(d_hidden) * learning_rate
    bias_hidden += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    # Print loss sometimes
    if epoch % 1000 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


# 5. Predictions after training
print("\nPredictions after training:")
for i, inputs in enumerate(X):
    hidden_output = sigmoid(np.dot(inputs, weights_input_hidden) + bias_hidden)
    final_output = sigmoid(np.dot(hidden_output, weights_hidden_output) + bias_output)
    print(f"{inputs} -> {final_output[0][0]:.3f} (Target: {y[i][0]})")


Epoch 0, Loss: 0.2662
Epoch 1000, Loss: 0.1434
Epoch 2000, Loss: 0.0228
Epoch 3000, Loss: 0.0069
Epoch 4000, Loss: 0.0036

Predictions after training:
[0 0] -> 0.004 (Target: 0)
[0 1] -> 0.050 (Target: 0)
[1 0] -> 0.047 (Target: 0)
[1 1] -> 0.933 (Target: 1)


In [5]:
sigmoid(0.523)

0.6278489986434628

In [6]:
sigmoid(0.8415)

0.698781039260481

In [None]:
# Here is a Multilayer neural network example in TensorFlow that shows:

# Multilayer architecture (hidden layers)

# Gradient descent via optimizers

# Effect of learning rate (SGD vs Adam)

# Scenario:

# To Predict whether a customer will churn (leave a subscription) or stay, using simple numeric features.

# 1. Setup and toy churn dataset

import tensorflow as tf
import numpy as np

tf.random.set_seed(1)
np.random.seed(1)

# -----------------------------
# 1. Toy "customer churn" data
# -----------------------------
# Features:
# [months_with_company, support_calls]
X = np.array([
    [1, 5],   # new, many issues
    [2, 4],
    [3, 3],
    [12, 1],  # loyal, few issues
    [24, 0],
    [18, 1],
    [4, 4],
    [6, 3],
    [10, 2],
    [30, 0]
], dtype=np.float32)

# Labels: 1 = churn, 0 = stays
y = np.array([
    [1],  # likely to churn
    [1],
    [1],
    [0],  # likely to stay
    [0],
    [0],
    [1],
    [1],
    [0],
    [0]
], dtype=np.float32)

# Scale features roughly (simple normalization)
X_mean = X.mean(axis=0, keepdims=True)
X_std = X.std(axis=0, keepdims=True)
X_norm = (X - X_mean) / X_std

# Customers with few months + many support calls tend to churn.

# Long-term, low-issue customers tend to stay.

# -----------------------------
# 2. Multilayer neural network
# -----------------------------
def build_mlp():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(2,)),          # 2 input features
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(4, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')  # churn probability
    ])
    return model

# Two hidden layers (8 and 4 neurons) make it a multilayer network (MLP).

# relu helps nonlinear decision boundaries; sigmoid outputs probability.

# -----------------------------
# 3. Train with SGD (small LR)
# -----------------------------
model_sgd = build_mlp()

sgd_small = tf.keras.optimizers.SGD(learning_rate=0.01)  # gradient descent step size [web:49]

model_sgd.compile(optimizer=sgd_small,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

history_sgd = model_sgd.fit(X_norm, y,
                            epochs=200,
                            verbose=0)

print("Final loss (SGD, lr=0.01):", history_sgd.history['loss'][-1])
print("Final acc  (SGD, lr=0.01):", history_sgd.history['accuracy'][-1])

Final loss (SGD, lr=0.01): 0.6096780896186829
Final acc  (SGD, lr=0.01): 0.8999999761581421


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
import numpy as np

# Generate some dummy data for demonstration
# Assuming input_shape=(10, 1) means 10 time steps, 1 feature per step
num_samples = 100
X_train = np.random.rand(num_samples, 10, 1)
y_train = np.random.rand(num_samples, 1)

model = Sequential([
    SimpleRNN(32, activation='tanh', input_shape=(10, 1)),
    Dense(1)
])

model.compile(
    optimizer='adam',
    loss='mse'
)

model.fit(X_train, y_train, epochs=20)

Epoch 1/20


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.6274
Epoch 2/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.2381
Epoch 3/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.1197
Epoch 4/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.1290
Epoch 5/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.1258
Epoch 6/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.1036
Epoch 7/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0958
Epoch 8/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0969
Epoch 9/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0945
Epoch 10/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0916
Epoch 11/20
[1m4/4[0m [32m━

<keras.src.callbacks.history.History at 0x7a6b18def740>

In [None]:
# Scenario:
# A factory measures daily average machine temperature and wants to predict tomorrow’s temperature from the last 5 days to spot overheating trends.

# Typical RNN use cases:

# Time-series forecasting (stock prices, weather, sensor data).

# Text sequences (next word prediction, sentiment).

# Event sequences (clickstreams, logs, medical signals).

# Vanishing and exploding gradients (intuitive)
# Vanishing gradients: during backpropagation through many time steps, gradients get multiplied by small numbers repeatedly and become almost zero, so early time steps learn almost nothing.

# Exploding gradients: the opposite; gradients keep growing and become huge, causing unstable updates and sometimes NaNs.

# RNNs are especially prone because the same recurrent weights are applied at each time step, so gradients are multiplied many times.

# Common fixes: use gated units (LSTM/GRU), gradient clipping, better initialization, or skip connections.

# 1. Data preparation for Simple RNN

import numpy as np
import tensorflow as tf

tf.random.set_seed(1)
np.random.seed(1)

# -----------------------------
# 1. Create toy temperature data
# -----------------------------
# 60 days of temperatures (in °C) with a mild upward trend
days = np.arange(60)
temps = 30 + 0.05 * days + np.sin(days / 3)  # base + trend + small oscillation

# Turn it into supervised sequences:
# use last 5 days -> predict next day
window_size = 5
X_list, y_list = [], []

for i in range(len(temps) - window_size):
    X_list.append(temps[i:i+window_size])     # 5 consecutive days
    y_list.append(temps[i+window_size])       # next day's temp

X = np.array(X_list)  # shape: (num_samples, 5)
y = np.array(y_list)  # shape: (num_samples,)

# Reshape for RNN: (batch, time_steps, features)
X = X[..., np.newaxis]   # (num_samples, 5, 1)
y = y[..., np.newaxis]   # (num_samples, 1)

# Simple train/val split
train_size = int(0.8 * len(X))
X_train, X_val = X[:train_size], X[train_size:]
y_train, y_val = y[:train_size], y[train_size:]

# Sequence length = 5 time steps, 1 feature (temperature).

# RNN will learn a pattern: “given last 5 days, what is tomorrow?”.

# 2. Simple RNN model with gradient clipping

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

# -----------------------------
# 2. Build Simple RNN model
# -----------------------------
model = Sequential([
    SimpleRNN(
        units=16,
        activation='tanh',          # default, keeps hidden state in [-1,1] [web:72][web:78]
        input_shape=(window_size, 1)
    ),
    Dense(1)                        # predict next temperature
])

model.summary()

# SimpleRNN processes the sequence day-by-day, keeping a hidden state.

# Final Dense layer outputs a single number (next day’s temperature).

# Training with gradient descent (optimizer + learning rate)

# -----------------------------
# 3. Compile with optimizer & learning rate
# -----------------------------
# SGD with gradient clipping to reduce exploding gradients
optimizer = tf.keras.optimizers.SGD(
    learning_rate=0.01,         # step size for gradient descent [web:49]
    clipnorm=1.0                # clip gradient L2 norm to prevent explosion [web:80]
)

model.compile(optimizer=optimizer,
              loss='mse',       # regression
              metrics=['mae'])

# -----------------------------
# 4. Train the model
# -----------------------------
history = model.fit(
    X_train, y_train,
    epochs=200,
    batch_size=8,
    validation_data=(X_val, y_val),
    verbose=0
)

print("Final train loss:", history.history['loss'][-1])
print("Final val loss  :", history.history['val_loss'][-1])

# Optimizer implements gradient descent: compute gradients, step in negative gradient direction.

# learning_rate controls how big each step is; too large can worsen exploding gradients.

# clipnorm clips gradient norms, a standard defence against exploding gradients in RNNs.

# 4. Comparing with Adam (optional variation)

# -----------------------------
# 5. Same RNN with Adam
# -----------------------------
model_adam = Sequential([
    SimpleRNN(16, activation='tanh', input_shape=(window_size, 1)),
    Dense(1)
])

adam_opt = tf.keras.optimizers.Adam(
    learning_rate=0.01,   # adaptive gradient descent [web:51]
    clipnorm=1.0
)

model_adam.compile(optimizer=adam_opt,
                   loss='mse',
                   metrics=['mae'])

history_adam = model_adam.fit(
    X_train, y_train,
    epochs=200,
    batch_size=8,
    validation_data=(X_val, y_val),
    verbose=0
)

print("Final train loss (Adam):", history_adam.history['loss'][-1])
print("Final val loss  (Adam):", history_adam.history['val_loss'][-1])

# Adam adapts learning rates per parameter and usually trains faster and more stably on sequence tasks.

# Same clipnorm again to prevent exploding gradients.

# 5. Making a prediction

# -----------------------------
# 6. Predict next temperature
# -----------------------------
last_5_days = temps[-window_size:]            # real last 5 days
input_seq = last_5_days.reshape(1, window_size, 1)

next_temp_sgd = model.predict(input_seq, verbose=0)[0, 0]
next_temp_adam = model_adam.predict(input_seq, verbose=0)[0, 0]

print("Last 5 days:", np.round(last_5_days, 2))
print(f"Predicted next temp (SGD) : {next_temp_sgd:.2f} °C")
print(f"Predicted next temp (Adam): {next_temp_adam:.2f} °C")

# This simulates the real-world use: feed last few measurements, get next-step forecast.

# This example gives you:

# A concrete time-series scenario for RNN.

# A SimpleRNN model using Keras/TensorFlow.

# Discussion hooks for vanishing/exploding gradients, with gradient clipping and optimizer choice as mitigations.

  super().__init__(**kwargs)


Final train loss: 0.9931224584579468
Final val loss  : 1.458232045173645
Final train loss (Adam): 0.9896753430366516
Final val loss  (Adam): 1.4385826587677002
Last 5 days: [32.26 32.62 33.   33.37 33.68]
Predicted next temp (SGD) : 31.33 °C
Predicted next temp (Adam): 31.34 °C


In [None]:
# LSTM Model

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential([
    LSTM(50, return_sequences=False, input_shape=(10, 1)),
    Dense(1)
])

model.compile(
    optimizer='adam',
    loss='mse'
)

model.fit(X_train, y_train, epochs=20)


Epoch 1/20


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - loss: 1011.2144
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 1003.2014
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 995.1638
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 987.1111
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 979.0270
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 970.8619
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 962.5461
Epoch 8/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 954.0143
Epoch 9/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 945.2170
Epoch 10/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 936.1152
Epoch 11

<keras.src.callbacks.history.History at 0x7a6aff0e8260>

In [None]:
# Scenario and use case
# Scenario:
# Predict next day’s website traffic (visits) from the last 7 days to help plan server capacity.

# Typical LSTM/GRU uses:

# Time series: stock prices, traffic, energy demand.


# NLP: next-word prediction, ftranslation, sentiment.


# Sequences: clickstreams, sensor readings, speech.


# 1. Data preparation

import numpy as np
import tensorflow as tf

tf.random.set_seed(1)
np.random.seed(1)

# -----------------------------
# 1. Create synthetic traffic data
# -----------------------------
days = np.arange(200)
# baseline 1000 visits, weekly seasonality + small upward trend
traffic = 1000 + 30 * np.sin(2 * np.pi * days / 7) + 0.8 * days

window_size = 7   # use last 7 days -> predict next day

X_list, y_list = [], []
for i in range(len(traffic) - window_size):
    X_list.append(traffic[i:i + window_size])
    y_list.append(traffic[i + window_size])

X = np.array(X_list)            # (samples, 7)
y = np.array(y_list)            # (samples,)

# Normalize (simple)
mean = X.mean()
std = X.std()
X_norm = (X - mean) / std
y_norm = (y - mean) / std

# Reshape for RNN: (batch, time_steps, features)
X_norm = X_norm[..., np.newaxis]   # (samples, 7, 1)
y_norm = y_norm[..., np.newaxis]   # (samples, 1)

# Train/validation split
split = int(0.8 * len(X_norm))
X_train, X_val = X_norm[:split], X_norm[split:]
y_train, y_val = y_norm[:split], y_norm[split:]

# This creates a realistic-looking daily traffic series with weekly seasonality and a trend.

# 2. LSTM architecture and working model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# -----------------------------
# 2. Build LSTM model
# -----------------------------
def build_lstm_model():
    model = Sequential([
        LSTM(
            units=32,
            activation='tanh',
            recurrent_activation='sigmoid',
            input_shape=(window_size, 1)
        ),
        Dense(1)  # predict normalized next-day traffic
    ])
    return model

lstm_model = build_lstm_model()
lstm_model.summary()

# LSTM cell behaviour (intuitive):

# Maintains a cell state that flows along time steps, plus a hidden state.


# Uses input, forget, and output gates to decide

# what new information to add,

# what to erase,

# what to output at each time step.


# This design helps keep important gradients from vanishing across many steps.

# 3. Train LSTM (optimizer + learning rate)
# -----------------------------
# 3. Compile with Adam optimizer
# -----------------------------
optimizer = tf.keras.optimizers.Adam(learning_rate=0.005)  # adaptive GD [web:51]

lstm_model.compile(
    optimizer=optimizer,
    loss='mse',
    metrics=['mae']
)

# -----------------------------
# 4. Train the model
# -----------------------------
history_lstm = lstm_model.fit(
    X_train, y_train,
    epochs=40,
    batch_size=16,
    validation_data=(X_val, y_val),
    verbose=0
)

print("LSTM final train loss:", history_lstm.history['loss'][-1])
print("LSTM final val loss  :", history_lstm.history['val_loss'][-1])
# Adam performs gradient descent with adaptive learning rates and momentum, well suited for LSTM training.


# Learning rate 0.005 balances stability and speed; adjusting it changes convergence behaviour.

# 4. GRU model on the same data

from tensorflow.keras.layers import GRU

# -----------------------------
# 5. Build GRU model
# -----------------------------
def build_gru_model():
    model = Sequential([
        GRU(
            units=32,
            activation='tanh',
            recurrent_activation='sigmoid',
            input_shape=(window_size, 1)
        ),
        Dense(1)
    ])
    return model

gru_model = build_gru_model()

gru_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.005),
    loss='mse',
    metrics=['mae']
)

history_gru = gru_model.fit(
    X_train, y_train,
    epochs=40,
    batch_size=16,
    validation_data=(X_val, y_val),
    verbose=0
)

print("GRU final train loss:", history_gru.history['loss'][-1])
print("GRU final val loss  :", history_gru.history['val_loss'][-1])

# GRU uses update and reset gates with a single hidden state, merging some LSTM gates.


# For the same units, GRU has fewer parameters and is usually faster, often with similar accuracy.


# 5. Prediction and LSTM vs GRU comparison

# -----------------------------
# 6. Predict next day's traffic
# -----------------------------
last_week = traffic[-window_size:]
last_week_norm = ((last_week - mean) / std).reshape(1, window_size, 1)

pred_lstm_norm = lstm_model.predict(last_week_norm, verbose=0)[0, 0]
pred_gru_norm = gru_model.predict(last_week_norm, verbose=0)[0, 0]

pred_lstm = pred_lstm_norm * std + mean
pred_gru = pred_gru_norm * std + mean

print("Last 7 days traffic:", np.round(last_week, 1))
print(f"LSTM predicted next day traffic: {pred_lstm:.1f} visits")
print(f"GRU  predicted next day traffic: {pred_gru:.1f} visits")

# In practice:

# Start with GRU when you want speed and similar performance.


# Use LSTM when very long-term dependencies and fine memory control matter

  super().__init__(**kwargs)


LSTM final train loss: 0.0004889776464551687
LSTM final val loss  : 0.020924631506204605
GRU final train loss: 0.00027973001124337316
GRU final val loss  : 0.011794696561992168
Last 7 days traffic: [1141.4 1126.  1132.5 1156.8 1181.1 1187.6 1172.2]
LSTM predicted next day traffic: 1144.7 visits
GRU  predicted next day traffic: 1142.6 visits


#### A cold storage warehouse stores vaccines and medicines.
#### The storage temperature must remain stable to avoid spoilage.
#### The system records the daily average temperature of the cold room.
#### The goal is to predict tomorrow’s temperature using the last 7 days of data 
#### so that alerts can be generated before temperatures go out of safe limits.

In [8]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

temps = np.array([4.1,4.0,3.9,4.2,4.1,4.0,3.8,4.0,3.9,4.1])

scaler = MinMaxScaler()
temps_scaled = scaler.fit_transform(temps.reshape(-1,1))

X, y = [], []
for i in range(len(temps_scaled)-7):
    X.append(temps_scaled[i:i+7])
    y.append(temps_scaled[i+7])

X, y = np.array(X), np.array(y)

In [9]:
y

array([[0.5 ],
       [0.25],
       [0.75]])

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

lstm_model = Sequential([
    LSTM(50, activation='tanh', input_shape=(7,1)),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X, y, epochs=50, batch_size=1, verbose=0)


2025-12-29 16:16:16.297588: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-12-29 16:16:17.007842: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-12-29 16:16:17.410114: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-12-29 16:16:18.552009: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-29 16:16:31.909653: E external/local_xla/xla/

<keras.src.callbacks.history.History at 0x7fd55d2b5510>

In [3]:
from tensorflow.keras.layers import GRU

gru_model = Sequential([
    GRU(50, activation='tanh', input_shape=(7,1)),
    Dense(1)
])

gru_model.compile(optimizer='adam', loss='mse')
gru_model.fit(X, y, epochs=50, batch_size=1, verbose=0)


<keras.src.callbacks.history.History at 0x7fd53c7b6b00>

In [4]:
last_7_days = temps_scaled[-7:].reshape(1,7,1)

lstm_pred = scaler.inverse_transform(lstm_model.predict(last_7_days))
gru_pred = scaler.inverse_transform(gru_model.predict(last_7_days))

print("LSTM Prediction:", lstm_pred[0][0])
print("GRU Prediction :", gru_pred[0][0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 628ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step
LSTM Prediction: 3.9976902
GRU Prediction : 3.9829915


In [5]:
SAFE_MIN = 2.0
SAFE_MAX = 8.0

predicted_temp = lstm_pred[0][0]   # or gru_pred[0][0]

if predicted_temp < SAFE_MIN or predicted_temp > SAFE_MAX:
    print("⚠️ ALERT: Temperature may go out of safe range tomorrow!")
else:
    print("✅ Temperature expected to remain safe.")

✅ Temperature expected to remain safe.
