# Traffic forecasting using LSTM

**Author:** Carnot Braun & Allan M.Sousa
**Date created:** 2024/05/08<br>
**Last modified:** 2024/05/08<br>
**Description:** This example demonstrates how to do timeseries forecasting over graphs.

## Setup

In [None]:
!pip install pandas numpy matplotlib tensorflow keras scikit-learn seaborn

In [1]:
import pandas as pd
import numpy as np
import os
import typing
import matplotlib.pyplot as plt
import scipy
import time 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
import seaborn as sns

2024-07-17 15:36:23.756519: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-17 15:36:23.761737: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-17 15:36:23.802376: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-17 15:36:23.850270: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-17 15:36:23.898494: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registe

## Agregar as emissões por RSU, criando uma serie temporal para cada

In [2]:
def load_and_aggregate_data(folder_path):
    all_data = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            road = os.path.splitext(filename)[0]
            a = pd.read_csv(file_path, sep=',', header=None, usecols=[0, 1, 3], 
                            names=['step', 'road_id', 'c02_emission'], skiprows=[0])
            
            # Converte o tempo para datetime
            a['step'] = pd.to_datetime(a['step'], unit='s')
            
            # Converte c02_emission para numérico, forçando erros a NaN e preenchendo NaN com 0
            a['c02_emission'] = pd.to_numeric(a['c02_emission'], errors='coerce').fillna(0)
            
            # Agrupa por 1 segundo, somando apenas colunas numéricas
            a = a.groupby([pd.Grouper(key='step', freq='1s')])[['c02_emission']].sum().reset_index()
            a['road_id'] = road
            
            all_data.append(a)
    
    # Combina todos os dados de diferentes RSUs em um único DataFrame
    aggregated_data = pd.concat(all_data, axis=0).reset_index(drop=True)
    
    return aggregated_data

## Pré-processamento dos dados

In [3]:
def create_time_series(data, time_col, value_col):
    data = data.sort_values(by=time_col)
    data[value_col] = data[value_col].rolling(5).mean()
    data.dropna(inplace=True)
    time_series = data[[time_col, value_col]].set_index(time_col)
    return time_series

## Preparar os dados do modelo

In [4]:
def prepare_lstm_data(series, n_steps):
    X, y = [], []
    horizon = 15
    series = pd.Series(series).rolling(60).mean()
    series.dropna(inplace=True)
    series = series.values
    
    for i in range(len(series)):
        end_ix = i + n_steps
        if end_ix + horizon > len(series)-1:
            break
        seq_x, seq_y = series[i:end_ix], series[end_ix + horizon]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

def create_lstm_model(input_shape):
    model = Sequential()
    model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape))
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

folder_path = '/home/carnot/hiaac/data/rsus_cologne_csv/rsus/'  # Altere para o caminho correto
rsu_data = load_and_aggregate_data(folder_path)


## Treinamento da LSTM

In [5]:
n_steps = 10
scaler = MinMaxScaler()

train_data = []
test_data = []

for rsu, group in rsu_data.groupby('road_id'):
    time_series = create_time_series(group, 'step', 'c02_emission')
    series = scaler.fit_transform(time_series.values).flatten()
    X, y = prepare_lstm_data(series, n_steps)
    
    split_index = int(len(X) * 0.65)  # 65% para treino
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]
    
    train_data.append((X_train, y_train))
    test_data.append((X_test, y_test))

X_train_agg = np.concatenate([data[0] for data in train_data], axis=0)
y_train_agg = np.concatenate([data[1] for data in train_data], axis=0)
X_test_agg = np.concatenate([data[0] for data in test_data], axis=0)
y_test_agg = np.concatenate([data[1] for data in test_data], axis=0)

X_train_agg = X_train_agg.reshape((X_train_agg.shape[0], X_train_agg.shape[1], 1))
X_test_agg = X_test_agg.reshape((X_test_agg.shape[0], X_test_agg.shape[1], 1))


In [6]:
model = create_lstm_model((X_train_agg.shape[1], 1))

start_time = time.process_time()
model.fit(X_train_agg, y_train_agg, epochs=50, verbose=0)
end_time = time.process_time()

# Avaliar o modelo
y_pred = model.predict(X_test_agg, verbose=0)
mse = np.mean((y_test_agg - y_pred.flatten())**2)

print(f"Mean Squared Error: {mse}")
print(f"Execution Time: {end_time - start_time} seconds")

# Salvar tempos de execução e erros
times = pd.DataFrame([(n_steps, end_time - start_time)], columns=['steps', 'time'])
times.to_csv('time_h15.csv', index=False)

errors = pd.DataFrame([(n_steps, mse)], columns=['steps', 'MSE'])
errors.to_csv('lstm_errors_h15.csv', index=False)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-07-17 15:37:13.763271: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-07-17 15:37:13.763573: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  super().__init__(**kwargs)


Mean Squared Error: 3.354700312293626e-05
Execution Time: 4851.958208575 seconds


In [7]:
model.save_weights('model_h15.weights.h5')

In [22]:
model.load_weights('model_h30.weights.h5')

Load weights/compare with RSUs

In [27]:
# Carregar os pesos treinados
weights_file = '/home/carnot/Downloads/model_h30.weights.h5'

# Caminho para a pasta com os dados das RSUs
folder_path = '/home/carnot/hiaac/data/most/rsus_most_csv/' 

# Carregar os dados das RSUs
rsu_data = load_and_aggregate_data(folder_path)

n_steps = 10
scaler = MinMaxScaler()
results = []

for rsu, group in rsu_data.groupby('road_id'):
    time_series = create_time_series(group, 'step', 'c02_emission')
    series = scaler.fit_transform(time_series.values).flatten()
    X, y = prepare_lstm_data(series, n_steps)
    
    split_index = int(len(X) * 0.65)  # 65% para treino
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]
    
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    # Criar e carregar os pesos no modelo
    model = create_lstm_model((X_test.shape[1], 1))
    model.load_weights(weights_file)

    # Avaliar o modelo
    y_pred = model.predict(X_test, verbose=0)
    mse = np.mean((y_test - y_pred.flatten())**2)
    
    results.append({'RSU': rsu, 'MSE': mse})
    print(f"RSU: {rsu}, Mean Squared Error: {mse}")

    # Salvar os resultados
results_df = pd.DataFrame(results)
results_df.to_csv('lstm_errors_h30.csv', index=False)

  a = pd.read_csv(file_path, sep=',', header=None, usecols=[0, 1, 3],
  a = pd.read_csv(file_path, sep=',', header=None, usecols=[0, 1, 3],
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


RSU: RSU_0, Mean Squared Error: 7.270263999401529e-08


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


RSU: RSU_1, Mean Squared Error: 7.270263999401529e-08


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


RSU: RSU_2, Mean Squared Error: 7.270263999401529e-08


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


RSU: RSU_3, Mean Squared Error: 7.270263999401529e-08


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


RSU: RSU_4, Mean Squared Error: 7.270263999401529e-08


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


RSU: RSU_5, Mean Squared Error: 7.270263999401529e-08
