In [1]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
import os
import matplotlib.pyplot as plt

device = (
    "cuda"
    if torch.cuda.is_available()
    else "cpu"
)

In [105]:
# To get tensors, use torch.from_numpy()

array = np.array(
    [
    [[1,2,3,4,5,6,7,8,9],
    [11,12,13,14,15,16,17,18,19],],
    
    [[21,22,23,24,25,26,27,28,29],
    [31,32,33,34,35,36,37,38,39],],
    ])

array2 = np.array(
    [
    [[1,2,3,4,5,6,7,8,9, 40, 41,],
    [11,12,13,14,15,16,17,18,19, 50, 51],],
    
    [[21,22,23,24,25,26,27,28,29, 60, 61],
    [31,32,33,34,35,36,37,38,39, 70, 71],],
    ])

array3 = np.array(
    [
    [[1,2,3,4,5,6,7,8,9, 40, 41,],
    [11,12,13,14,15,16,17,18,19, 50, 51],],
    
    [[21,22,23,24,25,26,27,28,29, 60, 61],
    [31,32,33,34,35,36,37,38,39, 70, 71],],
    ])


torcharray = torch.from_numpy(array).to(device=device, dtype=torch.float)
torcharray2 = torch.from_numpy(array2).to(device=device, dtype=torch.float)

Conv1D_dept = torch.nn.Conv1d(in_channels=2, out_channels=2, kernel_size= 2, stride=1, padding=0, bias=False, device = device,)
Conv1D_pointwise = torch.nn.Conv1d(in_channels=2, out_channels=3, kernel_size= 2, stride=1, padding=0, bias=False, device = device)

In [107]:
print(torcharray.shape)
torcharray

torch.Size([2, 2, 9])


tensor([[[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
         [11., 12., 13., 14., 15., 16., 17., 18., 19.]],

        [[21., 22., 23., 24., 25., 26., 27., 28., 29.],
         [31., 32., 33., 34., 35., 36., 37., 38., 39.]]], device='cuda:0')

In [8]:
class RUL_Dataset(Dataset):
    """Face Landmarks dataset. Example from PyTorch: see https://pytorch.org/tutorials/beginner/data_loading_tutorial.html"""

    def __init__(self, train_dir, permutations, max_starting = 1e4, min_lenght = 3600, transform=None):
        """
        Arguments:
            train_dir (string): path to a directory with all csv files with n time series of features with labels (labels are csv last series) .
            permutations (integer): number of truncated time series we want to extract for 1 epoch
            max_starting (int): The maximum value for the starting point of the series
            min_lenght (int): The minimum lenght for all series
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.transform = transform
        self.permutations = permutations
        self.min_lenght = min_lenght
        self.max_starting = max_starting

        # Checking for different types 
        signs = [' ', ',', ';','    ']
        series_t = []
        for file_path in os.listdir(train_dir):

            full_path = '/'.join([train_dir, file_path])

            for sign in signs:
                try:
                    series_t += [torch.Tensor(np.loadtxt(full_path, delimiter=sign))]
                    print(f"Sign : '{sign}' works")
                except : 
                    print(f"Sign : '{sign}' does not work")

        self.series_t = series_t

        all_series_len = [series_t[i].shape[1] for i in range(len(series_t))]
        self.all_series_len = all_series_len

    def __len__(self):
        return self.permutations

    def __getitem__(self, idx):
        
        # Draw the random sequence lenght
        a = 0.5
        z = np.random.uniform(size = 1)
        z = a*z[0]**3 - a*1.5*z[0]**2 + (1+0.5*a)*z[0]


        sequence_lenght = int(self.min_lenght + z*(max(self.all_series_len) - self.min_lenght))

        existence, start_points = self._selfbatch(self.all_series_len, sequence_lenght)
        

        label = torch.as_tensor(np.array([[self.series_t[i][-1, start_points[i]+sequence_lenght ]] for i in existence]))
        # List of all series features
        features = torch.as_tensor(np.array([self.series_t[i][:-1, start_points[i] : start_points[i]+sequence_lenght]  for i in existence ]))

        return features, label
    
    
    def _selfbatch(self, all_series_len, seq_len):

        # decides which series are included in thos batch
        existence = [i for i in range(len(all_series_len)) if seq_len<all_series_len[i]]

        # randomly select start_points that guarantee existence
        start_points = np.random.randint(low=0, high=[max(all_series_len[i] - seq_len, 1) for i in range(len(all_series_len))])

        return existence, start_points

In [9]:
training_dataset = RUL_Dataset(train_dir="D:\Imperial_College\FYP\B0_tests\Training", permutations=200, )

loader = DataLoader(training_dataset, batch_size=1, shuffle=False)


Sign : ' ' does not work
Sign : ',' works
Sign : ';' does not work
Sign : '    ' does not work
Sign : ' ' does not work
Sign : ',' works
Sign : ';' does not work
Sign : '    ' does not work
Sign : ' ' does not work
Sign : ',' works
Sign : ';' does not work
Sign : '    ' does not work
Sign : ' ' does not work
Sign : ',' works
Sign : ';' does not work
Sign : '    ' does not work


In [11]:
next(iter(loader))

random variable transform z is :0.3069093484747535
Random lenght: 13344
Chosen series: [0, 1, 2, 3]
Random start points: [ 2717   724  6273 11888]


[tensor([[[[ 4.9717e-02, -1.6850e-01,  1.9289e-01,  ...,  9.7187e-02,
             6.9624e-02,  1.1863e-01],
           [-7.8471e-01, -7.2190e-01, -7.2729e-01,  ..., -1.3617e+00,
            -1.5037e+00, -1.5143e+00],
           [-8.4011e-02, -8.3600e-02, -8.4232e-02,  ..., -2.7351e-01,
            -2.7190e-01, -2.7216e-01],
           [-7.7650e-02, -8.1015e-02, -8.2973e-02,  ..., -4.3038e-02,
            -1.6133e-02, -2.4287e-02]],
 
          [[-9.4739e-02, -9.4004e-02, -1.7483e-01,  ...,  8.0876e-02,
             5.6628e-02,  2.0623e-02],
           [ 2.4107e-01,  5.0125e-01,  5.4413e-01,  ..., -1.0436e+00,
            -9.2221e-01, -1.1142e+00],
           [-9.9682e-02, -1.0055e-01, -1.0123e-01,  ..., -2.1559e-01,
            -2.1504e-01, -2.1593e-01],
           [-8.4312e-03, -1.7658e-02, -6.2377e-03,  ...,  1.6444e-03,
             8.5608e-03, -1.8819e-02]],
 
          [[ 3.0076e-01,  9.3441e-02,  3.7409e-02,  ...,  9.9845e-02,
             3.0957e-01,  1.4867e-01],
           [-

In [69]:
train_dir = "D:\Imperial_College\FYP\B0_tests\Training"
signs = [' ', ',', ';','    ']
series_t = []
for file_path in os.listdir(train_dir):
    full_path = '/'.join([train_dir, file_path])
    for sign in signs:
        try:
            series_t += [torch.Tensor(np.loadtxt(full_path, delimiter=sign))]
            print(f"Sign : '{sign}' works")
        except : 
            print(f"Sign : '{sign}' does not work")

all_series_len = [series_t[i].shape[1] for i in range(len(series_t))]
print(f"Len list : {all_series_len}")

seq_len = 15000
print(f"Seq Len : {seq_len}")

existence = [i for i in range(len(all_series_len)) if seq_len<all_series_len[i]]
print(f"Existence : {existence}")

start_points = np.random.randint(low=0, high=[max(all_series_len[i] - seq_len, 1) for i in range(len(all_series_len))])
print(f"Start points : {start_points}")


Sign : ' ' does not work
Sign : ',' works
Sign : ';' does not work
Sign : '    ' does not work
Sign : ' ' does not work
Sign : ',' works
Sign : ';' does not work
Sign : '    ' does not work
Sign : ' ' does not work
Sign : ',' works
Sign : ';' does not work
Sign : '    ' does not work
Sign : ' ' does not work
Sign : ',' works
Sign : ';' does not work
Sign : '    ' does not work
Len list : [22928, 35352, 24366, 29163]
Seq Len : 15000
Existence : [0, 1, 2, 3]
Start points : [  660 11494  2861 13804]


In [72]:
start_points_class = np.random.randint(low=0, high=[max(all_series_len[i] - seq_len, 1) for i in range(len(all_series_len))])

print(start_points_class)

[ 6381 18091  8845  7443]
