# Model preparation

## Synthetic data generation

In [None]:
import torch
import random

# Define a function to generate the synthetic data considering time zones, weekends, and shutdown
def generate_synthetic_data(num_instances, sequence_length=720):
    data = []
    labels = []

    # Timezone to active hours mapping
    timezones = {
        'UTC': (6, 10),  # Active between 6 AM and 10 AM UTC
        'PST': (9, 19),  # Active between 9 AM and 7 PM PST
        'CET': (7, 17),  # Active between 7 AM and 5 PM CET
        'IND': (0, 12),  # Active between 0 AM and 12 PM CET
    }

    for _ in range(num_instances):
        # Randomly pick a time zone for this server
        timezone = random.choice(list(timezones.keys()))
        active_start, active_end = timezones[timezone]

        # Randomly choose behavior: proper, improper, mostly_off, weekdays_on_weekends_off, or improper_with_maintenance
        case_type = random.choice(["improper", "improper", "proper", "improper", "improper", "improper", "improper", "improper", "improper", "mostly_off", "improper", "improper_with_maintenance", "improper", "weekdays_on_weekends_off", "improper_with_maintenance"])

        # Initialize sequence and label variables before entering the conditional block
        sequence = []
        label = None  # Start with None to make sure label is always defined

        if case_type == "proper":
            # Case: Server is ON Monday to Friday, OFF on Saturday and Sunday
            for hour in range(sequence_length):
                # Determine which day of the week it is (0 = Monday, 6 = Sunday)
                day_of_week = (hour // 24) % 7  # 0 to 6 (Mon-Sun)

                if day_of_week < 5:  # Weekdays: ON (Mon-Fri)
                    # Convert the hour to the correct time zone (local time)
                    local_hour = (hour + active_start) % 24  # Adjust for timezone shift

                    if local_hour >= active_start and local_hour < active_end:  # ON during business hours
                        sequence.append(1)
                    else:  # OFF outside of business hours
                        sequence.append(0)
                else:  # Weekends: OFF (Sat-Sun)
                    sequence.append(0)
            label = 1  # Proper pause behavior (ON weekdays, OFF weekends)

        elif case_type == "improper":
            # Case 3: Prolonged idle periods (server stays ON for entire period)
            sequence = [1] * sequence_length  # Server stays ON for the entire period
            label = 0  # Improper pause behavior

        elif case_type == "mostly_off":
            # Case 4: Server is mostly OFF except for patching/maintenance days
            sequence = [0] * sequence_length  # Start with all OFF

            # Randomly select days for patching (ON)
            patch_days = random.sample(range(0, sequence_length // 24), 3)  # Random 3 days for patching
            for day in patch_days:
                start_hour = day * 24
                for hour in range(start_hour, start_hour + 24):  # Mark the full day as ON
                    sequence[hour] = 1

            label = 1  # Proper behavior but with occasional ON for patching

        elif case_type == "weekdays_on_weekends_off":
            # Case 5: Server is ON during weekdays (Mon-Fri) and OFF on weekends (Sat-Sun)
            for hour in range(sequence_length):
                # Determine which day of the week it is (0 = Monday, 6 = Sunday)
                day_of_week = (hour // 24) % 7  # 0 to 6 (Mon-Sun)

                if day_of_week < 5:  # Weekdays: ON (Mon-Fri)
                    sequence.append(1)
                else:  # Weekends: OFF (Sat-Sun)
                    sequence.append(0)
            label = 1  # Proper behavior for ON during weekdays and OFF during weekends

        elif case_type == "improper_with_maintenance":
            # Case 6: Server is ON most of the time, but occasionally OFF for patching/maintenance
            sequence = [1] * sequence_length  # Server stays ON for most of the time
            
            # Randomly choose 1-2 days per month (approximately every 30 days) to be OFF for maintenance
            maintenance_days = random.sample(range(0, sequence_length // 24), 2)  # Random 1-2 days for maintenance

            for day in maintenance_days:
                # Randomly pick hours within the day to simulate the server being turned OFF for patching
                patch_hours = random.sample(range(day * 24, (day + 1) * 24), 4)  # 4 random hours OFF for patching
                for hour in patch_hours:
                    sequence[hour] = 0

            label = 0  # Improper behavior (server should be off more regularly but is occasionally turned off for patching)

        # Append the generated sequence and label to data and labels
        data.append(sequence)
        labels.append(label)

    data = torch.tensor(data).float().unsqueeze(-1)  # Shape (num_instances, sequence_length, 1)
    labels = torch.tensor(labels).float().unsqueeze(-1)  # Shape (num_instances, 1)
    return data, labels

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

# Example dataset class
class EC2Dataset(Dataset):
    def __init__(self, data, labels):
        """
        Args:
            data (torch.Tensor): Shape (num_instances, 720, 1), binary ON/OFF states.
            labels (torch.Tensor): Shape (num_instances, 1), binary classification labels.
        """
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Generate synthetic data
data, labels = generate_synthetic_data(num_instances=10000)
print(f"Data shape: {data.shape}, Labels shape: {labels.shape}")
num_positive = (labels == 1).sum().item()
num_negative = (labels == 0).sum().item()
print(f"Positive: {num_positive}, Negative: {num_negative}")

# Create dataset and dataloader
dataset = EC2Dataset(data, labels)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


## Define and train RNN

Create simple RNN

In [41]:
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 64)  # Fully connected layer
        self.fc2 = nn.Linear(64, 32)           # Fully connected layer
        self.fc3 = nn.Linear(32, 1)           # Output layer
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        _, (hn, _) = self.lstm(x)  # hn is the hidden state of the last LSTM cell
        x = self.relu(self.fc1(hn.squeeze(0)))  # Squeeze to remove unnecessary dimension
        x = self.sigmoid(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x


Create stacked RNN and calibrator for temperature scaling

In [3]:
import torch.nn as nn

class TemperatureScaling(nn.Module):
    def __init__(self):
        super(TemperatureScaling, self).__init__()
        self.temperature = nn.Parameter(torch.ones(1))  # Initialize temperature at 1.0

    def forward(self, logits):
        return logits / self.temperature
    
class StackedLSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2):
        super(StackedLSTMModel, self).__init__()
        # Stacked LSTM
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        # Fully connected layers
        self.fc1 = nn.Linear(hidden_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        # Activation functions
        self.relu = nn.ReLU()
        #self.temperature_scaling = TemperatureScaling()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Pass through the stacked LSTM
        lstm_out, (hn, cn) = self.lstm(x)  # hn is the hidden state for the last time step of each layer
        
        # Use the hidden state from the last LSTM layer (topmost layer)
        x = hn[-1]  # hn shape: [num_layers, batch_size, hidden_size], selecting topmost layer's hidden state

        # Pass through fully connected layers
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x


Train the model

In [None]:
# Instantiate the model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

#model = LSTMModel(input_size=1, hidden_size=64)
model = StackedLSTMModel(input_size=1, hidden_size=64, num_layers=2)
model = model.to(device)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

LOSS_THRESHOLD = 0.01  # Stop training if loss is below this value
# Training loop
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for batch_idx, (inputs, labels) in enumerate(dataloader.):
        # Move data to GPU if available
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute loss
        loss = criterion(outputs, labels)
        epoch_loss += loss.item()

        # Backward pass
        loss.backward()
        optimizer.step()

    if epoch_loss < LOSS_THRESHOLD:
        print(f"Stopping early at epoch {epoch+1} as loss < {LOSS_THRESHOLD:.4f}")
        break

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(dataloader):.4f}")


Train the temperature scaling

In [6]:
calibrator = TemperatureScaling()

# Freeze model parameters
for param in model.parameters():
    param.requires_grad = False

# Optimizer for temperature scaling
optimizer = torch.optim.LBFGS(calibrator.parameters(), lr=0.0001)  # L-BFGS is commonly used for calibration
criterion = nn.BCEWithLogitsLoss()  # Use logits for calibration

# Training loop
model.eval()
calibrator.train()
calibrator = calibrator.to(device)

for epoch in range(500):  # 500 epochs or until convergence
    def closure():
        optimizer.zero_grad()
        with torch.no_grad():
            logits = model(inputs)  # Get logits from validation data
        calibrated_logits = calibrator(logits)
        loss = criterion(calibrated_logits, labels)  # Compute loss with true labels
        loss.backward()
        return loss

    optimizer.step(closure)


Evaluation

In [13]:
def evaluate_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            # Move data to GPU if available
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            predictions = (outputs > 0.5).float()
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    print(f"Accuracy: {accuracy:.4f}")

# Evaluate on GPU
evaluate_model(model, dataloader)
# Forward pass without sigmoid
raw_outputs = model(inputs)
print(f"Raw Outputs: {raw_outputs}")


In [6]:
torch.save(model.state_dict(), "/mnt/data_lake/models/stop_start_trained_model.pth")
#torch.save(calibrator.state_dict(), "/mnt/data_lake/models/stop_start_trained_model_temperature_scaling.pth")

# Load model and inference

## Synthetic dataset for inference

In [10]:
import torch
import random

# Define a function to generate one element per case type
def generate_single_instance_per_case(sequence_length=720):
    data = []
    labels = []

    timezones = {
        'UTC': (9, 12),
        'PST': (0, 3),
        'CET': (5, 20),
    }

    case_types = [
        "proper",
        "improper",
        "mostly_off",
        "weekdays_on_weekends_off",
        "improper_with_maintenance",
    ]

    for case_type in case_types:
        # Randomly pick a timezone for this server
        timezone = random.choice(list(timezones.keys()))
        active_start, active_end = timezones[timezone]

        sequence = []
        label = None

        if case_type == "proper":
            # ON weekdays, OFF weekends
            for hour in range(sequence_length):
                day_of_week = (hour // 24) % 7
                if day_of_week < 5:  # Weekdays
                    local_hour = (hour + active_start) % 24
                    sequence.append(1 if active_start <= local_hour < active_end else 0)
                else:  # Weekends
                    sequence.append(0)
            label = 1

        elif case_type == "improper":
            # Always ON
            sequence = [1] * sequence_length
            label = 0

        elif case_type == "mostly_off":
            # Mostly OFF, some patching days
            sequence = [0] * sequence_length
            patch_days = random.sample(range(0, sequence_length // 24), 3)
            for day in patch_days:
                start_hour = day * 24
                for hour in range(start_hour, start_hour + 24):
                    sequence[hour] = 1
            label = 1

        elif case_type == "weekdays_on_weekends_off":
            # ON weekdays, OFF weekends
            for hour in range(sequence_length):
                day_of_week = (hour // 24) % 7
                sequence.append(1 if day_of_week < 5 else 0)
            label = 1

        elif case_type == "improper_with_maintenance":
            # Mostly ON, occasional OFF for patching
            sequence = [1] * sequence_length
            maintenance_days = random.sample(range(0, sequence_length // 24), 2)
            for day in maintenance_days:
                patch_hours = random.sample(range(day * 24, (day + 1) * 24), 4)
                for hour in patch_hours:
                    sequence[hour] = 0
            label = 0

        data.append(sequence)
        labels.append(label)

    data = torch.tensor(data).float().unsqueeze(-1)
    labels = torch.tensor(labels).float().unsqueeze(-1)
    return data, labels, case_types



## Inference

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#model = LSTMModel()
model_pred = StackedLSTMModel(input_size=1, hidden_size=64, num_layers=2)
#calibrator_eval = TemperatureScaling()
model_pred.load_state_dict(torch.load("/mnt/data_lake/models/stop_start_trained_model.pth"))
model_pred = model_pred.to(device)
model_pred.eval()
#calibrator_eval.load_state_dict(torch.load("/mnt/data_lake/models/stop_start_trained_model_temperature_scaling.pth"))
#calibrator_eval = calibrator_eval.to(device)
#calibrator_eval.eval()


# Temperature scaling
#logits = model_pred(inputs)  # Get raw logits from the model
#calibrated_logits = calibrator(logits)  # Apply temperature scaling
#probabilities = torch.sigmoid(calibrated_logits)  # Get calibrated probabilities

# Generate synthetic data
data, labels, case_types = generate_single_instance_per_case()
print(f"Generated Data Shape: {data.shape}, Labels Shape: {labels.shape}")

# Run inference
with torch.no_grad():
    data = data.to(device)
    predictions = model_pred(data)  # Pass data through the model
    predictions_binary = (predictions > 0.5).float()  # Threshold for binary classification
    #logits = model_pred(data)  # Get raw logits
    #calibrated_logits = calibrator(logits)
    #probabilities = torch.sigmoid(calibrated_logits)  # Final probabilities

# Print results
for i, case in enumerate(case_types):
    print(f"Case: {case}")
    print(f"  True Label: {int(labels[i].item())}")
    print(f"  Predicted Label: {int(predictions_binary[i].item())}")
    print(f"  Confidence: {predictions[i].item():.2f}")
    #print(f"  Calibrated: {calibrated_logits[i].item()}")
    

In [17]:
# Generate synthetic data
data, labels = generate_synthetic_data(num_instances=1000)
print(f"Data shape: {data.shape}, Labels shape: {labels.shape}")
num_positive = (labels == 1).sum().item()
num_negative = (labels == 0).sum().item()
print(f"Positive: {num_positive}, Negative: {num_negative}")

with torch.no_grad():
    data = data.to(device)
    predictions = model_pred(data)  # Pass data through the model
    predictions_binary = (predictions > 0.5).float()  # Threshold for binary classification

pre_num_positive = (predictions_binary == 1).sum().item()
pre_num_negative = (predictions_binary == 0).sum().item()
print(f"Predicted Positive: {pre_num_positive}, Predicted Negative: {pre_num_negative}")