In [1]:
from google.colab import drive
drive.mount('/content/gdrive')


# !cp "/content/gdrive/My Drive/DataAugumentation.zip" .
#ADDED NEW SOURCE
!cp "/content/gdrive/My Drive/data_aug(3_slices_with_repeated)_acceleration_full_data_20000.zip" .
!unzip -qq DataAugumentation.zip
!unzip -qq data_aug_3_slices_with_repeated_cluster_5.zip
!rm DataAugumentation.zip
!rm data_aug_3_slices_with_repeated_cluster_5.zip
data_path = 'DataAugumentation'

Mounted at /content/gdrive
unzip:  cannot find or open DataAugumentation.zip, DataAugumentation.zip.zip or DataAugumentation.zip.ZIP.
unzip:  cannot find or open data_aug_3_slices_with_repeated_cluster_5.zip, data_aug_3_slices_with_repeated_cluster_5.zip.zip or data_aug_3_slices_with_repeated_cluster_5.zip.ZIP.
rm: cannot remove 'DataAugumentation.zip': No such file or directory
rm: cannot remove 'data_aug_3_slices_with_repeated_cluster_5.zip': No such file or directory


In [2]:
!unzip -qq '/content/data_aug(3_slices_with_repeated)_acceleration_full_data_20000.zip'

In [26]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import joblib

SEQUENCE_LENGTH = 600
BATCH_SIZE = 45
EPOCHS = 250
LEARNING_RATE = 1e-5
PLOT_SAVE_DIR = 'predicted_vs_actual_plots'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)
# Data processing function
def process_file(file_path):
    df = pd.read_csv(file_path)

    df['Time'] = df['Time'] - df['Time'].iloc[0]

    df['Momentary fuel consumption'] = df['Trip fuel consumption'].diff().fillna(0)
    df['Acceleration'] = df['Speed'].diff().fillna(0)

    features = df[['Engine speed', 'Speed', 'slope', 'Acceleration']]
    target = df['Momentary fuel consumption']

    features = features.iloc[:SEQUENCE_LENGTH]
    target = target.iloc[:SEQUENCE_LENGTH]

    return features.values, target.values

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_original = []
y_original = []
X_augmented = []
y_augmented = []

base_folder_path = '/content/'

# Process the data
for i in range(6):
    if i == 5:
        folder_path = os.path.join(base_folder_path, f'data_aug(3_slices_with_repeated)_cluster_{i}')
        for filename in os.listdir(folder_path):
            if filename.endswith('.csv'):
                file_path = os.path.join(folder_path, filename)
                features, target = process_file(file_path)

                slices = filename.split('_')
                is_original_trip = slices[2] == slices[6] and slices[6] == slices[10]

                if is_original_trip:
                    X_original.append(features)
                    y_original.append(target)
                else:
                    X_augmented.append(features)
                    y_augmented.append(target)



cuda


In [27]:

# Function to pad or truncate sequences
def pad_or_truncate(sequence, length):
    if len(sequence) > length:
        return sequence[:length]
    elif len(sequence) < length:
        return np.pad(sequence, ((0, length - len(sequence)), (0, 0)), mode='constant')
    else:
        return sequence



In [28]:

# Apply padding/truncating to ensure consistent sequence length
X_original = [pad_or_truncate(x, SEQUENCE_LENGTH) for x in X_original]
y_original = [pad_or_truncate(y.reshape(-1, 1), SEQUENCE_LENGTH) for y in y_original]
X_augmented = [pad_or_truncate(x, SEQUENCE_LENGTH) for x in X_augmented]
y_augmented = [pad_or_truncate(y.reshape(-1, 1), SEQUENCE_LENGTH) for y in y_augmented]


In [29]:

# Convert data to PyTorch tensors
try:
    X_original = torch.tensor(np.array(X_original), dtype=torch.float32).to(DEVICE)
    y_original = torch.tensor(np.array(y_original), dtype=torch.float32).to(DEVICE)
    X_augmented = torch.tensor(np.array(X_augmented), dtype=torch.float32).to(DEVICE)
    y_augmented = torch.tensor(np.array(y_augmented), dtype=torch.float32).to(DEVICE)
except Exception as e:
    print(f"Error during tensor conversion: {e}")
    print(f"Shapes: X_original - {np.array(X_original).shape}, y_original - {np.array(y_original).shape}")
    print(f"Shapes: X_augmented - {np.array(X_augmented).shape}, y_augmented - {np.array(y_augmented).shape}")
    raise


In [30]:

# Split into training and test sets
num_test = int(0.2 * len(X_original))
X_test = X_original[:num_test]
y_test = y_original[:num_test]
X_train = torch.cat([X_original[num_test:], X_augmented])
y_train = torch.cat([y_original[num_test:], y_augmented])

# X_train = X_original[num_test:]
# y_train = y_original[num_test:]


In [31]:

# Convert the min and max values to tensors
min_val_x = torch.tensor([0, 0, -10, -10], dtype=torch.float32).to(DEVICE)
max_val_x = torch.tensor([8000, 150, 10, 10], dtype=torch.float32).to(DEVICE)

min_val_y = torch.tensor([0], dtype=torch.float32).to(DEVICE)
max_val_y = torch.tensor([20000], dtype=torch.float32).to(DEVICE)

# Custom normalization function for X
def custom_normalize_X(data, min_vals, max_vals):
    for i in range(data.shape[-1]):
        data[:, :, i] = (data[:, :, i] - min_vals[i]) / (max_vals[i] - min_vals[i])
    return data

# Custom normalization function for y
def custom_normalize_y(data, min_val, max_val):
    return (data - min_val) / (max_val - min_val)

# Normalize X_train and X_test
X_train_normalized = custom_normalize_X(X_train, min_val_x, max_val_x)
X_test_normalized = custom_normalize_X(X_test, min_val_x, max_val_x)

# Normalize y_train and y_test
y_train_normalized = custom_normalize_y(y_train, min_val_y, max_val_y)
y_test_normalized = custom_normalize_y(y_test, min_val_y, max_val_y)


In [32]:

# Define the PyTorch model
class FuelConsumptionModel(nn.Module):
    def __init__(self, input_size):
        super(FuelConsumptionModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size,32, batch_first=True, bidirectional=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(64, 32, batch_first=True, bidirectional=True)
        self.dropout2 = nn.Dropout(0.2)
        self.dense = nn.Linear(64, 1)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x = self.dense(x)
        return x


In [33]:

# Instantiate the model, define the loss function and the optimizer
model = FuelConsumptionModel(input_size=X_train_normalized.shape[-1]).to(DEVICE)
criterion = nn.L1Loss()
weight_decay = 1e-4  # L2 regularization factor
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=weight_decay)

In [34]:
# Assume 80% of the data is used for training and 20% for validation
train_size = int(0.8 * len(X_train_normalized))
val_size = len(X_train_normalized) - train_size

# Split the data while preserving the order
X_train_split = X_train_normalized[:train_size]
y_train_split = y_train_normalized[:train_size]

X_val_split = X_train_normalized[train_size:]
y_val_split = y_train_normalized[train_size:]

# Create DataLoader for training and validation sets
train_loader = DataLoader(TensorDataset(X_train_split, y_train_split), batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val_split, y_val_split), batch_size=BATCH_SIZE, shuffle=False)




In [35]:
# Early stopping parameters
patience = 10  # Number of epochs to wait before stopping if no improvement
best_loss = float('inf')  # Initialize best loss to infinity
epochs_without_improvement = 0  # Counter for epochs without improvement

# Training loop with early stopping
model.train()
for epoch in range(EPOCHS):
    running_loss = 0.0
    model.train()  # Ensure model is in training mode

    # Training phase
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Calculate average loss for the epoch
    avg_training_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{EPOCHS}], Training Loss: {avg_training_loss:.4f}")

    # Validation phase
    model.eval()  # Switch to evaluation mode
    val_running_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:  # Assume you have a validation DataLoader `val_loader`
            outputs = model(inputs)
            val_loss = criterion(outputs, targets)
            val_running_loss += val_loss.item()

    avg_val_loss = val_running_loss / len(val_loader)
    print(f"Epoch [{epoch+1}/{EPOCHS}], Validation Loss: {avg_val_loss:.4f}")

    # Early stopping check
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        epochs_without_improvement = 0
        torch.save(model.state_dict(), 'best_fuel_consumption_model.pth')  # Save the best model
    else:
        epochs_without_improvement += 1
        if epochs_without_improvement >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

# After training, you can save the final model
torch.save(model.state_dict(), 'final_fuel_consumption_model.pth')


Epoch [1/250], Training Loss: 0.0678
Epoch [1/250], Validation Loss: 0.0437
Epoch [2/250], Training Loss: 0.0515
Epoch [2/250], Validation Loss: 0.0421
Epoch [3/250], Training Loss: 0.0493
Epoch [3/250], Validation Loss: 0.0403
Epoch [4/250], Training Loss: 0.0471
Epoch [4/250], Validation Loss: 0.0386
Epoch [5/250], Training Loss: 0.0451
Epoch [5/250], Validation Loss: 0.0371
Epoch [6/250], Training Loss: 0.0435
Epoch [6/250], Validation Loss: 0.0361
Epoch [7/250], Training Loss: 0.0421
Epoch [7/250], Validation Loss: 0.0355
Epoch [8/250], Training Loss: 0.0409
Epoch [8/250], Validation Loss: 0.0351
Epoch [9/250], Training Loss: 0.0399
Epoch [9/250], Validation Loss: 0.0347
Epoch [10/250], Training Loss: 0.0389
Epoch [10/250], Validation Loss: 0.0343
Epoch [11/250], Training Loss: 0.0381
Epoch [11/250], Validation Loss: 0.0340
Epoch [12/250], Training Loss: 0.0374
Epoch [12/250], Validation Loss: 0.0337
Epoch [13/250], Training Loss: 0.0367
Epoch [13/250], Validation Loss: 0.0334
Epoc

#test

In [36]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn

# Constants
SEQUENCE_LENGTH = 600
PLOT_SAVE_DIR = 'predicted_vs_actual_plots'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the PyTorch model structure (same as the one used for training)
class FuelConsumptionModel(nn.Module):
    def __init__(self, input_size):
        super(FuelConsumptionModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size, 32, batch_first=True, bidirectional=True)
        self.dropout1 = nn.Dropout(0.2)
        self.lstm2 = nn.LSTM(64, 32, batch_first=True, bidirectional=True)
        self.dropout2 = nn.Dropout(0.2)
        self.dense = nn.Linear(64, 1)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x = self.dense(x)
        return x

# Load the trained model
def load_trained_model(model_path, input_size):
    model = FuelConsumptionModel(input_size=input_size)
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval()
    return model

# Process the file and prepare segments
def process_file(file_path):
    df = pd.read_csv(file_path)
    df['Time'] = df['Time'] - df['Time'].iloc[0]
    df['Trip fuel consumption'] = df['Trip fuel consumption'] - df['Trip fuel consumption'].iloc[0]
    df['Acceleration'] = df['Speed'].diff().fillna(0)
    features = df[['Engine speed', 'Speed', 'slope', 'Acceleration']]
    df['Momentary fuel consumption'] = df['Trip fuel consumption'].diff().fillna(0)
    target = df['Momentary fuel consumption']
    return features, target

# Pad and normalize the data
def pad_and_normalize(data, sequence_length=SEQUENCE_LENGTH):
    padded_data = np.zeros((len(data), sequence_length, data[0].shape[1]))
    for i, seq in enumerate(data):
        length = min(len(seq), sequence_length)
        padded_data[i, :length] = seq[:length]

    # Normalization (same as in your script)
    min_val_x = [0, 0, -10, -10]
    max_val_x = [8000, 150, 10, 10]
    for i in range(padded_data.shape[-1]):
        padded_data[:, :, i] = (padded_data[:, :, i] - min_val_x[i]) / (max_val_x[i] - min_val_x[i])

    return torch.tensor(padded_data, dtype=torch.float32).to(DEVICE)

# Predict and plot the results
def plot_predicted_vs_real(input_file, model):
    features, actual_values = process_file(input_file)
    num_segments = len(features) // SEQUENCE_LENGTH
    predictions = []

    for i in range(num_segments):
        segment = features.iloc[i * SEQUENCE_LENGTH:(i + 1) * SEQUENCE_LENGTH]
        segment_normalized = pad_and_normalize([segment.values])
        with torch.no_grad():
            segment_predictions = model(segment_normalized).cpu().numpy()
        predictions.extend(segment_predictions.flatten() * 20000)

    # Handle any remaining data
    remainder = len(features) % SEQUENCE_LENGTH
    if remainder != 0:
        last_segment = features.iloc[-remainder:]
        last_segment_normalized = pad_and_normalize([last_segment.values])
        with torch.no_grad():
            last_segment_predictions = model(last_segment_normalized).cpu().numpy()
        predictions.extend(last_segment_predictions.flatten() * 20000)

    predictions = np.array(predictions)

    plt.figure(figsize=(10, 6))
    plt.plot(np.cumsum(actual_values.values[:len(predictions)], axis=0), label='Real', color='blue')
    plt.plot(np.cumsum(predictions[:len(actual_values)], axis=0), label='Predicted', color='red')
    plt.xlabel('Index')
    plt.ylabel('Fuel Consumption')
    plt.title('Predicted vs Real Fuel Consumption')
    plt.legend()

    directory, filename = os.path.split(input_file)
    plot_filename = os.path.join(directory, f'{os.path.splitext(filename)[0]}_predicted_vs_real.png')
    plt.savefig(plot_filename)
    plt.close()

    print(f"Plot saved as: {plot_filename}")

    # Save predictions and actual values to CSV
    results_df = pd.DataFrame({
        'Speed': features["Speed"].iloc[:len(predictions)],
        'Actual': np.cumsum(actual_values.values[:len(predictions)], axis=0),
        'Predicted': np.cumsum(predictions[:len(actual_values)], axis=0)
    })

    csv_filename = os.path.join(directory, f'{os.path.splitext(filename)[0]}_predicted_vs_real.csv')
    results_df.to_csv(csv_filename, index=False)

# Paths to model and input file
model_path = '/content/best_fuel_consumption_model.pth'  # Adjust this to your PyTorch model path
input_file_path = '/content/NEDC_1000_slope_added.csv'

# Load model and predict
input_size = 4  # Number of features in the input data
model = load_trained_model(model_path, input_size)
plot_predicted_vs_real(input_file_path, model)


Plot saved as: /content/NEDC_1000_slope_added_predicted_vs_real.png
