In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [2]:
class AssemblyDataset(Dataset):
    def __init__(self, assembly_path,version):
        self.assembly_files = [os.path.join(assembly_path,version,f ) for f in os.listdir(os.path.join(assembly_path, version))]

    def __len__(self):
        return len(self.assembly_files)

    def __getitem__(self,file_idx):
        assembly_file = self.assembly_files[file_idx]
        assembly_vector = torch.load(assembly_file)
        return assembly_vector
assembly_dir="vec\\assembly"
for i in os.listdir(assembly_dir):
    assembly_dataset = AssemblyDataset(assembly_dir, i)
    print(f"{assembly_dataset[0].dtype}\t{assembly_dataset[0].shape}\t{len(assembly_dataset)}")

torch.float32	torch.Size([1871, 100])	36
torch.float32	torch.Size([1874, 100])	36
torch.float32	torch.Size([1880, 100])	36
torch.float32	torch.Size([1886, 100])	36
torch.float32	torch.Size([1886, 100])	36
torch.float32	torch.Size([1884, 100])	36
torch.float32	torch.Size([1871, 100])	36


In [3]:
class CPPDataset(Dataset):
    def __init__(self, cpp_path):
        self.cpp_files = [os.path.join(cpp_path, f) for f in os.listdir(
            os.path.join(cpp_path))]

    def __len__(self):
        return len(self.cpp_files)

    def __getitem__(self, file_idx):
        cpp_file = self.cpp_files[file_idx]
        cpp_vector = torch.load(cpp_file)
        return cpp_vector
cpp_dir="vec\\cpp"
cpp_dataset=CPPDataset(cpp_dir)

In [4]:
next(zip(cpp_dataset, assembly_dataset))[0]==cpp_dataset[0]

tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])

In [5]:
next(zip(cpp_dataset, assembly_dataset))[1] == assembly_dataset[0]

tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])

In [6]:
assembly_vec = assembly_dataset[0].to(device)
cpp_vec = cpp_dataset[0].to(device)

In [7]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim, nhead, num_layers, dim_feedforward, device="cuda:0"):
        super(TransformerModel, self).__init__()
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=input_dim,
                nhead=nhead,
                dim_feedforward=dim_feedforward,
                device=device
            ),
            num_layers=num_layers,
        )

    def forward(self, x):
        return self.transformer_encoder(x)
t=TransformerModel(100,5,1,200)
t.forward(assembly_vec).shape, t.forward(cpp_vec).shape

  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)


(torch.Size([1871, 100]), torch.Size([546, 100]))

In [8]:
class CrossAttentionTransformer(nn.Module):
    def __init__(self, input_dim, nhead, num_layers, dim_feedforward, device="cuda:0"):
        super(CrossAttentionTransformer, self).__init__()
        self.attention_layer = nn.MultiheadAttention(
            embed_dim=input_dim, num_heads=nhead,device=device
        )
        self.transformer_encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=input_dim,
                nhead=nhead,
                dim_feedforward=dim_feedforward,
                device=device,
            ),
            num_layers=num_layers,
        )

    def forward(self, assembly_vec, cpp_vec):
        # k:cpp_vec , q:assembly_vec , v:assembly_vec
        cpp_q, _ = self.attention_layer(cpp_vec, assembly_vec, assembly_vec)
        assembly_q, _ = self.attention_layer(assembly_vec, cpp_vec, cpp_vec)
        combined = torch.cat((cpp_q, assembly_q), dim=0)
        return self.transformer_encoder(combined)
c = CrossAttentionTransformer(100, 5, 1, 200)
assembly_vec=t.forward(assembly_vec)
cpp_vec=t.forward(cpp_vec)
c.forward(assembly_vec,cpp_vec).shape

torch.Size([2417, 100])

In [9]:
class CoordinationTransformer(nn.Module):
    def __init__(self, emb_dim, nhead, num_layers, dim_feedforward,output_dim=1,device='cuda:0'):
        super(CoordinationTransformer, self).__init__()
        self.assembly_transformer = TransformerModel(emb_dim, nhead, num_layers, dim_feedforward,device=device)
        self.cpp_transformer = TransformerModel(
            emb_dim, nhead, num_layers, dim_feedforward,device=device
        )
        self.cross_attention = CrossAttentionTransformer(
            emb_dim, nhead, num_layers, dim_feedforward,device=device
        )
        self.mlp = nn.Linear(emb_dim, output_dim,device=device)
    def forward(self, assembly_vec, cpp_vec):
        assembly_vec = self.assembly_transformer(assembly_vec)
        cpp_vec = self.cpp_transformer(cpp_vec)
        cross_out=self.cross_attention(assembly_vec, cpp_vec)
        cross_out=cross_out.mean(dim=0)
        return self.mlp(cross_out)
ct=CoordinationTransformer(100,2,3,128)
ct.forward(assembly_vec,cpp_vec)

tensor([0.1475], device='cuda:0', grad_fn=<ViewBackward0>)

In [10]:
import pandas as pd
version_list=os.listdir(assembly_dir)
version_target = dict.fromkeys(version_list)
for i in range(len(version_list)):
    version_target[version_list[i]]=i
version_target

{'10.2.0': 0,
 '11.3.0': 1,
 '12.2.0': 2,
 '13.2.0': 3,
 '7.5.0': 4,
 '8.4.0': 5,
 '9.2.0': 6}

In [11]:
model=CoordinationTransformer(100,2,3,128).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
losses = dict().fromkeys(version_list)
for i in losses.keys():
    losses[i] = []



In [12]:
for version in os.listdir(assembly_dir):
    assembly_dataset = AssemblyDataset(assembly_dir, version)
    target = torch.tensor(version_target[version]).to(device)
    for assembly_vec,cpp_vec in zip(assembly_dataset,cpp_dataset):
        assembly_vec=assembly_vec.to(device)
        cpp_vec=cpp_vec.to(device)
        optimizer.zero_grad()
        output=ct.forward(assembly_vec,cpp_vec)
        loss = criterion(output, target)
        losses[version].append(loss.item())
        loss.backward()
        optimizer.step()
import matplotlib.pyplot as plt
for version in losses.keys():
    plt.plot(losses[version], label=version)

OutOfMemoryError: CUDA out of memory. Tried to allocate 5.24 GiB. GPU 0 has a total capacity of 8.00 GiB of which 0 bytes is free. Of the allocated memory 17.80 GiB is allocated by PyTorch, and 4.04 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)