In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
pd.set_option('display.max_columns', 500)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, f1_score, recall_score, precision_score

from ViT_model import VisionTransformer
from transformer_model import Transformer

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.has_mps:
    device = torch.device('mps')
else:
    device = torch.device('cpu')

print("Device: {}".format(device))

Device: mps


In [3]:
pd_xela_allfiles = pd.read_csv('data.csv')
pd_sliplabel_allfiles = pd.read_csv('labels.csv')
pd_xela_allfiles.drop(columns=['Unnamed: 0'], inplace=True)
pd_xela_allfiles.head()

Unnamed: 0,txl1_x,txl1_y,txl1_z,txl2_x,txl2_y,txl2_z,txl3_x,txl3_y,txl3_z,txl4_x,txl4_y,txl4_z,txl5_x,txl5_y,txl5_z,txl6_x,txl6_y,txl6_z,txl7_x,txl7_y,txl7_z,txl8_x,txl8_y,txl8_z,txl9_x,txl9_y,txl9_z,txl10_x,txl10_y,txl10_z,txl11_x,txl11_y,txl11_z,txl12_x,txl12_y,txl12_z,txl13_x,txl13_y,txl13_z,txl14_x,txl14_y,txl14_z,txl15_x,txl15_y,txl15_z,txl16_x,txl16_y,txl16_z
0,15825.0,16435.0,39825.0,15955.0,16459.0,39477.0,15767.0,16355.0,39049.0,15264.0,16591.0,40432.0,15804.0,16539.0,38817.0,15890.0,16367.0,38264.0,15758.0,16392.0,38276.0,15566.0,16494.0,38594.0,16179.0,16362.0,38096.0,15985.0,16319.0,38264.0,15794.0,16181.0,37601.0,15631.0,16283.0,38356.0,16171.0,16215.0,38774.0,15976.0,16356.0,38258.0,15887.0,16228.0,37885.0,15693.0,16427.0,38642.0
1,15825.0,16435.0,39825.0,15955.0,16459.0,39477.0,15767.0,16355.0,39049.0,15264.0,16591.0,40432.0,15804.0,16539.0,38817.0,15890.0,16367.0,38264.0,15758.0,16392.0,38276.0,15566.0,16494.0,38594.0,16179.0,16362.0,38096.0,15985.0,16319.0,38264.0,15794.0,16181.0,37601.0,15631.0,16283.0,38356.0,16171.0,16215.0,38774.0,15976.0,16356.0,38258.0,15887.0,16228.0,37885.0,15693.0,16427.0,38642.0
2,15826.0,16436.0,39818.0,15955.0,16459.0,39482.0,15767.0,16354.0,39049.0,15265.0,16589.0,40434.0,15805.0,16539.0,38812.0,15890.0,16365.0,38264.0,15756.0,16394.0,38277.0,15563.0,16493.0,38594.0,16179.0,16362.0,38092.0,15985.0,16320.0,38265.0,15793.0,16183.0,37597.0,15630.0,16283.0,38347.0,16172.0,16217.0,38777.0,15975.0,16356.0,38252.0,15888.0,16231.0,37885.0,15692.0,16429.0,38641.0
3,15826.0,16435.0,39826.0,15956.0,16458.0,39478.0,15766.0,16354.0,39051.0,15264.0,16590.0,40438.0,15805.0,16540.0,38812.0,15892.0,16367.0,38269.0,15756.0,16394.0,38277.0,15563.0,16493.0,38594.0,16177.0,16360.0,38099.0,15986.0,16321.0,38265.0,15793.0,16183.0,37599.0,15630.0,16282.0,38353.0,16170.0,16215.0,38776.0,15975.0,16355.0,38257.0,15888.0,16231.0,37885.0,15692.0,16429.0,38641.0
4,15826.0,16432.0,39827.0,15956.0,16458.0,39483.0,15768.0,16355.0,39051.0,15265.0,16589.0,40437.0,15806.0,16537.0,38811.0,15892.0,16366.0,38263.0,15757.0,16393.0,38283.0,15563.0,16495.0,38593.0,16179.0,16360.0,38092.0,15988.0,16320.0,38271.0,15793.0,16182.0,37601.0,15630.0,16284.0,38350.0,16171.0,16212.0,38774.0,15974.0,16356.0,38257.0,15887.0,16228.0,37884.0,15693.0,16426.0,38641.0


In [4]:
pd_xela_allfiles.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229651 entries, 0 to 229650
Data columns (total 48 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   txl1_x   229651 non-null  float64
 1   txl1_y   229651 non-null  float64
 2   txl1_z   229651 non-null  float64
 3   txl2_x   229651 non-null  float64
 4   txl2_y   229651 non-null  float64
 5   txl2_z   229651 non-null  float64
 6   txl3_x   229651 non-null  float64
 7   txl3_y   229651 non-null  float64
 8   txl3_z   229651 non-null  float64
 9   txl4_x   229651 non-null  float64
 10  txl4_y   229651 non-null  float64
 11  txl4_z   229651 non-null  float64
 12  txl5_x   229651 non-null  float64
 13  txl5_y   229651 non-null  float64
 14  txl5_z   229651 non-null  float64
 15  txl6_x   229651 non-null  float64
 16  txl6_y   229651 non-null  float64
 17  txl6_z   229651 non-null  float64
 18  txl7_x   229651 non-null  float64
 19  txl7_y   229651 non-null  float64
 20  txl7_z   229651 non-null  

In [5]:
pd_sliplabel_allfiles.drop(columns=['Unnamed: 0'], inplace=True)
pd_sliplabel_allfiles.head()

Unnamed: 0,slip
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [6]:
pd_sliplabel_allfiles.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229651 entries, 0 to 229650
Data columns (total 1 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   slip    229651 non-null  float64
dtypes: float64(1)
memory usage: 1.8 MB


In [7]:
pd_label = pd_sliplabel_allfiles.values.reshape(pd_sliplabel_allfiles.shape[0], 1)
pd_data =  pd_xela_allfiles.values

In [8]:
pd_data = torch.from_numpy(pd_data.astype(np.float32))
pd_label = torch.from_numpy(pd_label.astype(np.float32))

In [9]:
#split the data into train and test
pd_data_train, pd_data_test, pd_label_train, pd_label_test = train_test_split(pd_data, pd_label, test_size=0.1, shuffle=True)

#split into train and validation
pd_data_train, pd_data_valid, pd_label_train, pd_label_valid = train_test_split(pd_data_train, pd_label_train, test_size=0.3, shuffle = True)


In [10]:
class Batch_Taxels(Dataset):

    def __init__(self, pd_data_train, pd_label_train, pd_data_valid, pd_label_valid, valid = None):
        self.x = pd_data_train
        self.y = pd_label_train
        self.xvalid = pd_data_valid
        self.yvalid = pd_label_valid
        self.valid = valid

    def __len__(self):
        if self.valid == True:
            return self.xvalid.shape[0]
        else:
            return self.x.shape[0]

    def __getitem__(self, idx):

        if self.valid == True:
            return self.xvalid[idx].to(device), self.yvalid[idx].to(device)
        else:
            return self.x[idx].to(device), self.y[idx].to(device)

dataset = Batch_Taxels(pd_data_train, pd_label_train, pd_data_valid, pd_label_valid)
dataset2 = Batch_Taxels(pd_data_train, pd_label_train, pd_data_valid, pd_label_valid, valid = True)

xelaloader = DataLoader(dataset = dataset, batch_size=256, shuffle=True)
xelaloadervalid = DataLoader(dataset = dataset2, batch_size=256, shuffle=True)

In [11]:
for i in xelaloader:
    print(i[0].shape, i[1].shape)
    break

torch.Size([256, 48]) torch.Size([256, 1])


In [12]:
model = Transformer(n_classes=1, n_features=48,
                  embed_dim=32, depth=2, n_heads=4, mlp_ratio=1., 
                  qkv_bias=True, p=0.1, attn_p=0.1, proj_p=0.1).to(device)

In [13]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.BCEWithLogitsLoss()
num_epochs = 10

In [15]:
#Training and validation loop 
train_loss = []
valid_loss = []

t_loss = []
v_loss = []

t_acc = []
v_acc = []

t_acc_t = []
v_acc_t = []

for epoch in range(num_epochs):
    #Train per batch
    
    total = 0
    correct = 0
    l_xelaloader = 0

    model.train()
    for (x, y) in (xelaloader):

        #Forward pass
        y_pred = model(x)
       
        #compute the loss
        l = criterion(y_pred, y)

        #empty the gradients
        optimizer.zero_grad()

        #compute the gradient
        l.backward()

        #update the weights
        optimizer.step()


        #append each loss per batch
        train_loss.append(l.item())

        #accuracy
        total += y.size(0)
        correct += y_pred.round().eq(y).sum().item()
        l_xelaloader += x.shape[0]
        
    
    t_acc = correct/l_xelaloader
    t_acc_t.append(t_acc)

    total = 0
    correct = 0
    l_xelaloader = 0

    #calculate and plot the validation loss
    model.eval()
    for (x,y) in (xelaloadervalid):
        y_pred_test = model(x)
        lv = criterion(y_pred_test, y)
        #append the loss per batch
        valid_loss.append(lv.item())

        #accuracy
        total += y.size(0)
        correct += y_pred_test.round().eq(y).sum().item()
        l_xelaloader += x.shape[0]
        
    v_acc = correct/l_xelaloader
    v_acc_t.append(v_acc)

    #append the total loss and accuracy per epoch
    t_loss.append(np.mean(train_loss))
    v_loss.append(np.mean(valid_loss))

    print(f'Epoch {epoch+1}, loss = {l:.8f} , val_loss = {lv:.8f}')

RuntimeError: Given normalized_shape=[32], expected input with shape [*, 32], but got input of size[256, 49, 1]

In [None]:
torch.save(model.state_dict(), 'transformer.pth')