In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn

# Constants
SEQUENCE_LENGTH = 600
PLOT_SAVE_DIR = 'predicted_vs_actual_plots'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the PyTorch model structure (same as the one used for training)
class FuelConsumptionModel(nn.Module):
    def __init__(self, input_size):
        super(FuelConsumptionModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size, 32, batch_first=True, bidirectional=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(64, 32, batch_first=True, bidirectional=True)
        self.dropout2 = nn.Dropout(0.2)
        self.dense = nn.Linear(64, 1)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x = self.dense(x)
        return x

# Load the trained model
def load_trained_model(model_path, input_size):
    model = FuelConsumptionModel(input_size=input_size)
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval()
    return model

# Process the file and prepare segments
def process_file(file_path):
    df = pd.read_csv(file_path)
    df['Time'] = df['Time'] - df['Time'].iloc[0]
    df['Trip fuel consumption'] = df['Trip fuel consumption'] - df['Trip fuel consumption'].iloc[0]
    df['Acceleration'] = df['Speed'].diff().fillna(0)
    features = df[['Engine speed', 'Speed', 'slope', 'Acceleration']]
    df['Momentary fuel consumption'] = df['Trip fuel consumption'].diff().fillna(0)
    target = df['Momentary fuel consumption']
    return features, target

# Pad and normalize the data
def pad_and_normalize(data, sequence_length=SEQUENCE_LENGTH):
    padded_data = np.zeros((len(data), sequence_length, data[0].shape[1]))
    for i, seq in enumerate(data):
        length = min(len(seq), sequence_length)
        padded_data[i, :length] = seq[:length]

    # Normalization (same as in your script)
    min_val_x = [0, 0, -10, -10]
    max_val_x = [8000, 150, 10, 10]
    for i in range(padded_data.shape[-1]):
        padded_data[:, :, i] = (padded_data[:, :, i] - min_val_x[i]) / (max_val_x[i] - min_val_x[i])

    return torch.tensor(padded_data, dtype=torch.float32).to(DEVICE)

# Predict and plot the results
def plot_predicted_vs_real(input_file, model):
    features, actual_values = process_file(input_file)
    num_segments = len(features) // SEQUENCE_LENGTH
    predictions = []

    for i in range(num_segments):
        segment = features.iloc[i * SEQUENCE_LENGTH:(i + 1) * SEQUENCE_LENGTH]
        segment_normalized = pad_and_normalize([segment.values])
        with torch.no_grad():
            segment_predictions = model(segment_normalized).cpu().numpy()
        predictions.extend(segment_predictions.flatten() * 20000)

    # Handle any remaining data
    remainder = len(features) % SEQUENCE_LENGTH
    if remainder != 0:
        last_segment = features.iloc[-remainder:]
        last_segment_normalized = pad_and_normalize([last_segment.values])
        with torch.no_grad():
            last_segment_predictions = model(last_segment_normalized).cpu().numpy()
        predictions.extend(last_segment_predictions.flatten() * 20000)

    predictions = np.array(predictions)

    plt.figure(figsize=(10, 6))
    plt.plot(np.cumsum(actual_values.values[:len(predictions)], axis=0), label='Real', color='blue')
    plt.plot(np.cumsum(predictions[:len(actual_values)], axis=0), label='Predicted', color='red')
    plt.xlabel('Index')
    plt.ylabel('Fuel Consumption')
    plt.title('Predicted vs Real Fuel Consumption')
    plt.legend()

    directory, filename = os.path.split(input_file)
    plot_filename = os.path.join(directory, f'{os.path.splitext(filename)[0]}_predicted_vs_real.png')
    plt.savefig(plot_filename)
    plt.close()

    print(f"Plot saved as: {plot_filename}")

    # Save predictions and actual values to CSV
    results_df = pd.DataFrame({
        'Speed': features["Speed"].iloc[:len(predictions)],
        'Actual': np.cumsum(actual_values.values[:len(predictions)], axis=0),
        'Predicted': np.cumsum(predictions[:len(actual_values)], axis=0)
    })

    csv_filename = os.path.join(directory, f'{os.path.splitext(filename)[0]}_predicted_vs_real.csv')
    results_df.to_csv(csv_filename, index=False)

# Paths to model and input file
model_path = '/content/best_fuel_consumption_model.pth'  # Adjust this to your PyTorch model path
input_file_path = '/content/NEDC_1000_slope_added.csv'

# Load model and predict
input_size = 4  # Number of features in the input data
model = load_trained_model(model_path, input_size)
plot_predicted_vs_real(input_file_path, model)
