# Initial draft

In [2]:
import torch 
import torch.nn as nn
import numpy as np
import pandas as pd

In [None]:
torch.manual_seed(1)

lstm = nn.LSTM(3,3)
inputs = [torch.randn(1, 3) for _ in range(5)]
# this particular inputs batch is one sequence of length 5, with 3 features
inputs

In [None]:
len(inputs)

In [None]:
batch_size = len(inputs)
print([len(x) for x in inputs])
seq_len = max(len(x) for x in inputs)
num_features = len(inputs[0][0])
print(batch_size,seq_len,num_features)
inputs = torch.cat(inputs).view(batch_size, seq_len, num_features)
# one input to train on at a time of training

In [18]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import argparse
from pathlib import Path

class SequenceDataset(Dataset):
    def __init__(self, input_path, seq_length=40):
        self.inputs,self.targets = torch.load(input_path),
        self.seq_length = seq_length
        
    def __len__(self):
        return len(self.inputs) - self.seq_length + 1
    
    def __getitem__(self, idx):
        return (self.inputs[idx:idx + self.seq_length], 
                self.targets[idx + self.seq_length - 1])

class LSTMModel(nn.Module):
    def __init__(self, num_features, hidden_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(
            input_size=num_features,
            hidden_size=hidden_size,
            num_layers=num_layers,
            
            batch_first=True
        )
        self.linear = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        predictions = self.linear(lstm_out[:, -1, :])
        return predictions

def validate_model(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(device).float()
            targets = targets.to(device).float()
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    
    return total_loss / len(val_loader)

def train_model(num_hid, optimizer_type, learning_rate, epochs, data_dir):
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Data loading
    data_dir = Path(data_dir)
    train_dataset = SequenceDataset(
        data_dir / "train_input.pt",
        data_dir / "train_output.pt"
    )
    val_dataset = SequenceDataset(
        data_dir / "val_input.pt",
        data_dir / "val_output.pt"
    )
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    # Model initialization
    model = LSTMModel(num_features=4, hidden_size=num_hid).to(device)
    
    # Loss and optimizer
    criterion = nn.MSELoss()
    if optimizer_type.lower() == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_type.lower() == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    else:
        raise ValueError(f"Unsupported optimizer type: {optimizer_type}")
    
    # TensorBoard setup
    writer = SummaryWriter('runs/lstm_training')
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs = inputs.to(device).float()
            targets = targets.to(device).float()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        # Calculate average training loss
        avg_train_loss = total_loss / len(train_loader)
        
        # Validation every 100 epochs
        if (epoch + 1) % 100 == 0:
            val_loss = validate_model(model, val_loader, criterion, device)
            print(f'Epoch [{epoch+1}/{epochs}], '
                  f'Training Loss: {avg_train_loss:.4f}, '
                  f'Validation Loss: {val_loss:.4f}')
            
            # Log to TensorBoard
            writer.add_scalar('Training Loss', avg_train_loss, epoch)
            writer.add_scalar('Validation Loss', val_loss, epoch)
    
    # Save the model
    torch.save(model.state_dict(), 'lstm_model.pth')
    writer.close()

def main_test():
    parser = argparse.ArgumentParser(description='Train LSTM model')
    parser.add_argument('--num_hid', type=int, default=64,
                      help='number of hidden units')
    parser.add_argument('--optimizer', type=str, default='adam',
                      help='optimizer type (adam or sgd)')
    parser.add_argument('--lr', type=float, default=0.001,
                      help='learning rate')
    parser.add_argument('--epochs', type=int, default=1000,
                      help='number of epochs')
    parser.add_argument('--data_dir', type=str, default='./data',
                      help='directory containing the data files')
    
    args = parser.parse_args()
    
    train_model(
        num_hid=args.num_hid,
        optimizer_type=args.optimizer,
        learning_rate=args.lr,
        epochs=args.epochs,
        data_dir=args.data_dir
    )



In [None]:
train_model(
    num_hid=10,
    optimizer_type="adam",
    learning_rate=0.01,
    epochs=100,
    data_dir='../data'
)



In [2]:
import torch
import torch.nn as nn

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, num_features, hidden_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(
            input_size=num_features,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        self.linear = nn.Linear(hidden_size, 2)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        predictions = self.linear(lstm_out[:, -1, :])
        return predictions
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMModel(num_features=32, hidden_size=64)
model.load_state_dict(torch.load('../1257_1802.pth'))
model.eval()

# Create dummy input tensor
dummy_input = torch.randn([64, 40, 32])

# Export to ONNX
onnx_path = "../lstm_1257_1802.onnx"
torch.onnx.export(
    model,               # model being run
    dummy_input,        # model input (or a tuple for multiple inputs)
    onnx_path,          # where to save the model
    export_params=True, # store the trained parameter weights inside the model file
    opset_version=11,   # the ONNX version to export the model to
    do_constant_folding=True,  # whether to execute constant folding for optimization
    input_names=['input'],     # the model's input names
    output_names=['output'],   # the model's output names
    dynamic_axes={
        'input': {0: 'batch_size'},  # variable length axes
        'output': {0: 'batch_size'}
    }
)

In [10]:
class LSTMModel(nn.Module):
    def __init__(self, num_features, hidden_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.norm = nn.LayerNorm(num_features)
        self.lstm = nn.LSTM(
            input_size=num_features,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        self.linear = nn.Linear(hidden_size, 2)
    
    def forward(self, x):
        x = self.norm(x)
        lstm_out, _ = self.lstm(x)
        predictions = self.linear(lstm_out[:, -1, :])
        return predictions

In [None]:
# Convert to onnx
# import pytorch model
import onnx
model = LSTMModel(num_features=32, hidden_size=32)
model_path= '../lstm_03-04_11:18.pth'
state_dict = torch.load(torch.load(model_path))
# If the state_dict is wrapped (e.g., by DataParallel), use the following line
for item in state_dict:
    print(item)
state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
model.load_state_dict(state_dict)

print(f"Model loaded successfully from {model_path}")
model.eval()
input_shape = (1, 40, 32)
dummy_input = torch.randn(input_shape)

# Export the model to ONNX
try:
    torch.onnx.export(
        model,               # Model being exported
        dummy_input,         # Dummy input
        onnx_path,           # Output file path
        export_params=True,  # Store the trained parameter weights inside the model file
        opset_version=11,    # ONNX version to export the model to
        do_constant_folding=True,  # Optimize the model by executing constant folding
        input_names=['input'],      # Name of the input
        output_names=['output'],    # Name of the output
        dynamic_axes={'input': {0: 'batch_size'},  # Variable batch size
                        'output': {0: 'batch_size'}}
    )
    print(f"Model exported successfully to {onnx_path}")
except Exception as e:
    print(f"Error exporting model to ONNX: {e}")

# experiemtn with dataset style

In [24]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
import torch.nn.functional as F
from torch.autograd import Variable
from datetime import datetime

In [4]:
class SequenceDataset(Dataset):
    def __init__(self, train_path, seq_length=40):
        self.inputs, self.targets = torch.load(train_path)
        self.seq_length = seq_length
        
    def __len__(self):
        return len(self.inputs) - self.seq_length + 1
    
    def __getitem__(self, idx):
        return (self.inputs[idx:idx + self.seq_length], 
                self.targets[idx + self.seq_length - 1])


In [None]:
data_dir = '0227'
for file in Path('../data/processed_data').glob(f'{data_dir}*.pt'):
    print(file)
    ## add val_dataset here
    train_dataset = SequenceDataset(file)
    for item in enumerate(train_dataset.targets):
        print(f'target:{item}')
    break

In [None]:
data_dir = '0227'
for file in Path('../data/processed_data').glob(f'{data_dir}*.pt'):
    print(file)
    ## add val_dataset here
    train_dataset = SequenceDataset(file)
    for item in enumerate(train_dataset.inputs):
        print(f'input shape:{item[1].shape}')
        print(f'input:{item}')
    break

In [None]:
data_dir = '0227'
for file in Path('../data/processed_data').glob(f'{data_dir}*.pt'):
    print(file)
    ## add val_dataset here
    train_dataset = SequenceDataset(file)
    for item in enumerate(train_dataset):
        print(f'input shape:{item[1][0].shape}')
        print(f'input:{item}')
    break

# training test with differnet  data set style

In [11]:
class SequenceDataset(Dataset):
    def __init__(self, train_path, seq_length=40):
        self.inputs, self.targets = torch.load(train_path)
        self.seq_length = seq_length
        
    def __len__(self):
        return len(self.inputs) - self.seq_length + 1
    
    def __getitem__(self, idx):
        if idx < 0 or idx >= self.__len__():
            raise IndexError(f"Index {idx} out of bounds")
        return (self.inputs[idx:idx + self.seq_length], 
                self.targets[idx:idx + self.seq_length])


In [None]:
data_dir = '0227_change'
for file in Path('../data/processed_data').glob(f'{data_dir}*.pt'):
    print(file)
    ## add val_dataset here
    train_dataset = SequenceDataset(file)
    print(train_dataset.targets.shape,train_dataset.inputs.shape)
    print(len(train_dataset))
    # for item in enumerate(train_dataset):
    #     print(f'set:{item}')
    # break

In [12]:
class LSTMModel(nn.Module):
    def __init__(self, num_features, hidden_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.norm = nn.LayerNorm(num_features)
        # self.hid = nn.Linear(num_features, 16)
        self.lstm = nn.LSTM(
            input_size=num_features,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        self.linear = nn.Linear(hidden_size, 2)
    
    def forward(self, x):
        x = self.norm(x)
        # hid = self.hid(F.leaky_relu(norm))
        # # lstm_out, _ = self.lstm(x)
        # print(f'lstm output shape: {lstm_out.shape}')
        # print(f'lstm output values: {lstm_out[:, -1, :].shape}')
        # h_0 = Variable(torch.zeros(1, 12645, 32).cuda())
        # c_0 = Variable(torch.zeros(1, 12645, 32).cuda())
        lstm_out,_ = self.lstm(x)
        return self.linear(lstm_out) 

        # predictions = self.linear(lstm_out[:, -1, :])
        # return lstm_out

In [None]:
# params:
num_hid = 32
learning_rate = 0.1
epochs = 1000
optimizer_type = 'adam'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = len(train_dataset)
train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True,pin_memory=True)
# val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model initialization
model = LSTMModel(num_features=32, hidden_size=num_hid).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
if optimizer_type.lower() == 'adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
elif optimizer_type.lower() == 'sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
else:
    raise ValueError(f"Unsupported optimizer type: {optimizer_type}")

# TensorBoard setup
# writer = SummaryWriter('runs/lstm_training')

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):

        inputs = inputs.to(device)
        targets = targets.to(device)
        
        # Forward pass
        outputs = model(inputs) 
        # print(outputs.shape)       
        loss = criterion(outputs, targets).to(device)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        #print(loss.item())
    # Calculate average training loss
    avg_train_loss = total_loss
    
    # Validation every 100 epochs
    if (epoch + 1) % 10 == 0:
    #     val_loss = validate_model(model, val_loader, criterion, device)
        print(f'Epoch [{epoch+1}/{epochs}]',
                f'Training Loss: {avg_train_loss:.4f}')
    if epoch == epochs-1:
        print(outputs)

In [None]:
((324-315)**2+(443-437)**2)/2

In [None]:
outputs.shape

In [None]:
outputs[-1].shape

In [None]:
targets[-1].shape

In [None]:
targets[-1]

In [None]:
(outputs[-1]-targets[-1])**2

In [None]:
loss.item()

In [None]:
targets[-1]

In [16]:
def validate_model(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(device).float()
            targets = targets.to(device).float()
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    
    return total_loss / len(val_loader)

In [19]:
def export_model(model, device='cuda', input_shape=(40,1,32)):
    
    curr_time = datetime.now().strftime("%m-%d_%H:%M")
    save_path = f"lstm_{curr_time}"
    # Save PyTorch model
    torch.save({
        'model_state_dict': model.state_dict(),
        'model_architecture': model.__class__.__name__
    }, f"{save_path}.pth")
    
    # Prepare model for ONNX export
    model.eval()    
    # Create dummy input tensor
    dummy_input = torch.randn(input_shape, device=device)
    
    # Export to ONNX
    torch.onnx.export(
        model,
        dummy_input,
        f"{save_path}.onnx",
        export_params=True,
        opset_version=11,
        do_constant_folding=True,
        input_names=['input'],
        output_names=['output'],
        dynamic_axes={'input' : {0 : 'sequence_length'},    # variable length axes
                    'output' : {0 : 'sequence_length'}}
    )
    print(f"Model saved as {save_path}.pth and {save_path}.onnx")

In [36]:
#prepare data
data_dir = '0227_change'
train_dataset = None
for file in Path('../data/processed_data').glob(f'{data_dir}*.pt'):
    print(file)
    train_dataset = SequenceDataset(file)
val_dataset = SequenceDataset('../data/processed_data/0227_val_change_DMF_set.pt')

# params:
num_hid = 32
learning_rate = 0.15
epochs = 300
optimizer_type = 'adam'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = len(train_dataset)


train_loader = DataLoader(train_dataset, batch_size=batch_size,shuffle=True,pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Model initialization
model = LSTMModel(num_features=32, hidden_size=num_hid).to(device)
model.load_state_dict(torch.load('lstm_03-10_16:14.pth')['model_state_dict'])
# Loss and optimizer
criterion = nn.MSELoss()
if optimizer_type.lower() == 'adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
elif optimizer_type.lower() == 'sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
else:
    raise ValueError(f"Unsupported optimizer type: {optimizer_type}")

# TensorBoard setup
# writer = SummaryWriter('runs/lstm_training')

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):

        inputs = inputs.to(device)
        targets = targets.to(device)
        
        # Forward pass
        outputs = model(inputs) 
        # print(outputs.shape)       
        loss = criterion(outputs, targets).to(device)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        #print(loss.item())
    # Calculate average training loss
    avg_train_loss = total_loss
    
    # Validation every 100 epochs
    if (epoch + 1) % 10 == 0:
    #     val_loss = validate_model(model, val_loader, criterion, device)
        print(f'Epoch [{epoch+1}/{epochs}]',
                f'Training Loss: {avg_train_loss:.4f}')
    if (epoch + 1) % 100 == 0:
        val_loss = validate_model(model, val_loader, criterion, device)
        print(f'Epoch [{epoch+1}/{epochs}]',
                f'Validation Loss: {val_loss:.4f}')
    if epoch == epochs-1:
        print(max(abs((outputs-targets)).reshape(-1)))
        export_model(model)

../data/processed_data/0227_change_DMF_train_set.pt


  self.inputs, self.targets = torch.load(train_path)
  model.load_state_dict(torch.load('lstm_03-10_16:14.pth')['model_state_dict'])


Epoch [10/300] Training Loss: 11.8856
Epoch [20/300] Training Loss: 5.7231
Epoch [30/300] Training Loss: 5.0555
Epoch [40/300] Training Loss: 4.5847
Epoch [50/300] Training Loss: 4.2662
Epoch [60/300] Training Loss: 3.9925
Epoch [70/300] Training Loss: 3.7667
Epoch [80/300] Training Loss: 3.5040
Epoch [90/300] Training Loss: 3.3741
Epoch [100/300] Training Loss: 3.2416
Epoch [100/300] Validation Loss: 2.7751
Epoch [110/300] Training Loss: 3.0930
Epoch [120/300] Training Loss: 2.8657
Epoch [130/300] Training Loss: 2.7629
Epoch [140/300] Training Loss: 2.9207
Epoch [150/300] Training Loss: 2.7572
Epoch [160/300] Training Loss: 2.5234
Epoch [170/300] Training Loss: 2.4317
Epoch [180/300] Training Loss: 2.3397
Epoch [190/300] Training Loss: 3.5743
Epoch [200/300] Training Loss: 3.1331
Epoch [200/300] Validation Loss: 2.2262
Epoch [210/300] Training Loss: 2.7379
Epoch [220/300] Training Loss: 2.5580
Epoch [230/300] Training Loss: 2.4433
Epoch [240/300] Training Loss: 2.3360
Epoch [250/300] 

In [40]:
import numpy as np

In [44]:
sum_err = 0
for x in abs((outputs-targets)).reshape(-1):
    sum_err += x
print(sum_err/(len(outputs)*2))

tensor(42.4153, device='cuda:0', grad_fn=<DivBackward0>)
