## Dataset

In [24]:

from torch.utils.data import Dataset
import numpy as np
import torch
import pandas as pd
from sklearn.preprocessing import StandardScaler

class ClusterData(Dataset):
    def __init__(self, dir, seq_len=100):
        self.seq_len = seq_len
        data=np.genfromtxt(dir, delimiter=',', skip_header=1)
        self.X = self.scaler(data[:, [0,1,2,3,4,5,6,7]])
        self.y = data[:, [2]]
        self.num_labels = len(np.unique(self.y))
        self.len = len(self.y)

    def __len__(self):
        return (self.len - self.seq_len)-1

    def __getitem__(self, idx):
        x = np.transpose(self.X[idx:idx+self.seq_len])
        label = self.y[idx+self.seq_len+1]

        return torch.tensor(x, dtype=torch.float), torch.tensor(label, dtype=torch.float)

    ## standard scaler
    def scaler(self, X):
        X = np.transpose(X)
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        return np.transpose(X_scaled)


In [30]:
dir="Data/fishes/fish02/s1/df_timeseries_red_clusters_s1.csv"
data=pd.read_csv(dir).values
len(data)

1031

In [19]:
data=np.genfromtxt(dir, delimiter=',', skip_header=1)
data

array([[-0.4653035 , -0.03918175, -0.14511099, ...,  0.06563479,
        -0.13389835, -1.22144705],
       [-0.3604422 ,  0.56555777, -0.02922168, ...,  0.32560086,
        -0.06549535, -0.49053275],
       [-0.20472319,  0.36450274,  0.11925249, ...,  0.43059832,
         0.16412938, -0.5678883 ],
       ...,
       [-1.04949442, -0.80083659,  0.2403238 , ..., -0.1170361 ,
        -0.60914432, -0.770036  ],
       [-0.93945388, -0.66022951,  0.05873017, ..., -0.07327505,
        -0.74917304, -0.60596576],
       [-0.82811714, -0.58881305,  0.12199081, ..., -0.06425575,
        -0.72026573, -1.06753688]])

In [12]:
def normalize(X):
        X = np.transpose(X)
        X_norm = []
        for x in X:
            x = (x-np.min(x)) / (np.max(x)-np.min(x))
            X_norm.append(x)
        return np.transpose(X_norm)

## DataLoader

In [46]:
from torch.utils.data import DataLoader 
data=ClusterData(dir)
dataset_size=len(ClusterData(dir))
dataset_size

930

In [47]:
train_dataloader = DataLoader(data, batch_size=10, shuffle=True)

In [48]:
# Assuming 'train_dataloader' is an instance of DataLoader using your ClusterData dataset
for batch_idx, (inputs, labels) in enumerate(train_dataloader):
    print(f"Batch {batch_idx + 1}:")
    print(f"Input shape: {inputs.shape}")
    print(f"Label shape: {labels.shape}")
    # Optionally, break after the first batch if you just want to check the shapes
    break



Batch 1:
Input shape: torch.Size([10, 8, 100])
Label shape: torch.Size([10, 1])


## Model