# This project is about predicting the temperature 12 hours from now given a set of past features. The data is from Germany for about 7 years at the hourly granularity 

In [None]:
import keras
from keras import layers
from keras.datasets import imdb
from keras.utils import timeseries_dataset_from_array
import numpy as np
from matplotlib import pyplot as plt
import os

# Read the data from the csv file (do not worry); the header gives you the meaning of the features

In [None]:
from google.colab import files
uploaded = files.upload()

fname = list(uploaded.keys())[0]  # Get the uploaded filename
with open(fname) as f:
    data = f.read()
lines = data.split("\n")
header = lines[0].split(",")
lines = lines[1:]
print(header)

# These lines compute the temperature (the target) and the raw_data (the features). The raw_data also contains the temperature since past temperature will be used to predict the next temperature

In [None]:
temperature = np.zeros((len(lines),))
raw_data = np.zeros((len(lines), len(header) - 1))
for i, line in enumerate(lines):
    values = [float(x) for x in line.split(",")[1:]]
    temperature[i] = values[1]
    raw_data[i, :] = values[:]

# Normalize the raw data

In [None]:
mean = raw_data[:num_train_samples].mean(axis=0)
raw_data -= mean
std = raw_data[:num_train_samples].std(axis=0)
raw_data /= std

# Define the proper number of samples for the training, testing, and validation sets; do not create new sets

In [None]:
# Use the list 'lines' to build your 3 datasets. 70% of the length of lines should be the training dataset, 20% validation dataset, and the remaining 10% testing dataset
num_train_samples = 
num_val_samples = 
num_test_samples = 
print("num_train_samples:", num_train_samples)
print("num_val_samples:", num_val_samples)
print("num_test_samples:", num_test_samples)
print("total:",num_train_samples + num_val_samples + num_test_samples)

# This is the utility code to generate the data as discussed in the videos

In [None]:
sequence_length = 120
lookahead = 12 # predict 12 hours in the future
delay = (sequence_length + lookahead - 1)
batch_size = 256

train_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sequence_length=sequence_length,
    batch_size=batch_size,
    start_index=0,
    end_index=num_train_samples-1)

val_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sequence_length=sequence_length,
    batch_size=batch_size,
    start_index=num_train_samples,
    end_index=num_train_samples + num_val_samples-1)

test_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sequence_length=sequence_length,
    batch_size=batch_size,
    start_index=num_train_samples + num_val_samples,
    end_index=num_train_samples + num_val_samples + num_test_samples-delay-1)


# Now define the LSTM model, plot the results;

In [None]:
# Evaluate the model on the test dataset
test_loss, test_mae = ...
print(f"Test Loss: {test_loss:.4f}, Test MAE: {test_mae:.4f}")

In [None]:
# Generate predictions for the test dataset
predictions = ...

In [None]:
# Plot actual vs. predicted temperature values
import matplotlib.pyplot as plt

In [None]:
actual_temperatures = temperature[num_train_samples + num_val_samples + delay : num_train_samples + num_val_samples + num_test_samples]

plt.figure(figsize=(10, 5))
plt.plot(actual_temperatures, label="Actual Temperature", alpha=0.7)
plt.plot(predictions, label="Predicted Temperature", alpha=0.7)
plt.xlabel("Time")
plt.ylabel("Temperature (°C)")
plt.legend()
plt.title("Test Dataset: Actual vs. Predicted Temperature")
plt.show()