# 0 Set paths, devices, etc.

In [1]:
!pip install datasets



In [2]:
import numpy as np
import pandas as pd
import os
import h5py
import csv
import re

import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

Using device: cpu



In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
class SlidingWindowDataset(Dataset):
    def __init__(self, tensor, window_size, step_size):
        self.tensor = tensor
        self.window_size = window_size
        self.step_size = step_size
        self.num_samples, self.num_neurons, self.time_points = tensor.shape
        self.output_length = (self.time_points - window_size) // step_size + 1

    def __len__(self):
        return self.output_length

    def __getitem__(self, idx):
        start = idx * self.step_size
        end = start + self.window_size
        return self.tensor[:, :, start:end]

In [6]:
def save_tensor(dataloader, save_path):
    all_windows = []
    for window in tqdm(dataloader):
        all_windows.append(window)
    all_windows = torch.cat(all_windows, dim=0)
    torch.save(all_windows, save_path)

# 1 Find data pairs

In [8]:
data_dir = '/content/drive/MyDrive/neuro2voc/task_4/data/'
tensor_dir = os.path.join(data_dir, '1_tensors')
saving_dir = os.path.join(data_dir, '2_sample_time')

pattern = r'^data_\d+_\d+_\d+\.pt$'

data_files = [f for f in os.listdir(tensor_dir) if re.match(pattern, f)]

print(f"Found {len(data_files)} pairs.")

for data_file in data_files:

    label_file = data_file.replace('data_', 'labels_')

    if not os.path.exists(os.path.join(tensor_dir, label_file)):
        print(f"Not found: {label_file}")


Found 90 pairs.


In [9]:
before = 0
after = 1000

window_size = 500
step_size = 1

In [10]:
tensor_1 = torch.load(f"{tensor_dir}/data_1_{before}_{after}.pt")
tensor_2 = torch.load(f"{tensor_dir}/data_2_{before}_{after}.pt")
tensor_3 = torch.load(f"{tensor_dir}/data_3_{before}_{after}.pt")

  tensor_1 = torch.load(f"{tensor_dir}/data_1_{before}_{after}.pt")
  tensor_2 = torch.load(f"{tensor_dir}/data_2_{before}_{after}.pt")
  tensor_3 = torch.load(f"{tensor_dir}/data_3_{before}_{after}.pt")


In [11]:
tensor_1 = tensor_1.bool()
tensor_2 = tensor_2.bool()
tensor_3 = tensor_3.bool()

In [15]:
dataset_1 = SlidingWindowDataset(tensor_1, window_size, step_size)
dataset_2 = SlidingWindowDataset(tensor_2, window_size, step_size)
dataset_3 = SlidingWindowDataset(tensor_3, window_size, step_size)

In [16]:
dataloader_1 = DataLoader(dataset_1, batch_size=1, shuffle=False)
dataloader_2 = DataLoader(dataset_2, batch_size=1, shuffle=False)
dataloader_3 = DataLoader(dataset_3, batch_size=1, shuffle=False)

In [None]:
print("Processing tensor_1...")
save_tensor(dataloader_1, f'{saving_dir}/data_1_{before}_{after}_{window_size}_{step_size}.pt')

print("Processing tensor_2...")
save_tensor(dataloader_2, f'{saving_dir}/data_2_{before}_{after}_{window_size}_{step_size}.pt')

print("Processing tensor_3...")
save_tensor(dataloader_3, f'{saving_dir}/data_3_{before}_{after}_{window_size}_{step_size}.pt')

print(f'Windowed tensors saved to {saving_dir}/data_1-3_{before}_{after}_{window_size}_{step_size}.pt')

Processing tensor_1...


100%|██████████| 501/501 [00:02<00:00, 178.48it/s]


Processing tensor_2...


100%|██████████| 501/501 [00:00<00:00, 621.02it/s]


Processing tensor_3...


 96%|█████████▌| 480/501 [00:15<00:03,  6.62it/s]

In [24]:
# tensor_1 = torch.load(f"{tensor_dir}/data_1_{before}_{after}.pt").to(device)
# tensor_2 = torch.load(f"{tensor_dir}/data_2_{before}_{after}.pt").to(device)
tensor_3 = torch.load(f"{tensor_dir}/data_3_{before}_{after}.pt")

  tensor_3 = torch.load(f"{tensor_dir}/data_3_{before}_{after}.pt")


In [None]:
# windowed_tensor_1 = sliding_window(tensor_1, window_size, step_size)
# windowed_tensor_2 = sliding_window(tensor_2, window_size, step_size)
windowed_tensor_3 = sliding_window(tensor_3, window_size, step_size)

In [None]:
windowed_tensor_1 = torch.from_numpy(windowed_tensor_1)
windowed_tensor_2 = torch.from_numpy(windowed_tensor_2)
windowed_tensor_3 = torch.from_numpy(windowed_tensor_3)

In [None]:


window_size = 500
step_size = 1

windowed_tensor_1 = sliding_window(tensor_1, window_size, step_size)
windowed_tensor_2 = sliding_window(tensor_2, window_size, step_size)
windowed_tensor_3 = sliding_window(tensor_3, window_size, step_size)

torch.save(windowed_tensor_1, f'{saving_dir}/data_1_{before}_{after}_{window_size}_{step_size}.pt')
torch.save(windowed_tensor_2, f'{saving_dir}/data_2_{before}_{after}_{window_size}_{step_size}.pt')
torch.save(windowed_tensor_3, f'{saving_dir}/data_3_{before}_{after}_{window_size}_{step_size}.pt')

print(f'Windowed tensors saved to {data_dir}2-sample-time/data_1-3_{before}_{after}_{window_size}_{step_size}.pt')
print(f'Windowed tensor shape: {windowed_tensor_1.shape}')

  tensor_1 = torch.load(f"{tensor_dir}/data_1_{before}_{after}.pt")
  tensor_2 = torch.load(f"{tensor_dir}/data_2_{before}_{after}.pt")
  tensor_3 = torch.load(f"{tensor_dir}/data_3_{before}_{after}.pt")


In [None]:
tensor_1 = torch.load(f"{tensor_dir}/data_1_{before}_{after}.pt")
tensor_2 = torch.load(f"{tensor_dir}/data_2_{before}_{after}.pt")
tensor_3 = torch.load(f"{tensor_dir}/data_3_{before}_{after}.pt")

window_size = 500
step_size = 1

windowed_tensor_1 = sliding_window(tensor_1, window_size, step_size)
windowed_tensor_2 = sliding_window(tensor_2, window_size, step_size)
windowed_tensor_3 = sliding_window(tensor_3, window_size, step_size)

torch.save(windowed_tensor_1, f'{data_dir}2-sample-time/data_1_{before}_{after}_{window_size}_{step_size}.pt')
torch.save(windowed_tensor_2, f'{data_dir}2-sample-time/data_2_{before}_{after}_{window_size}_{step_size}.pt')
torch.save(windowed_tensor_3, f'{data_dir}2-sample-time/data_3_{before}_{after}_{window_size}_{step_size}.pt')

print(f'Windowed tensors saved to {data_dir}2-sample-time/data_1-3_{before}_{after}_{window_size}_{step_size}.pt')
print(f'Windowed tensor shape: {windowed_tensor_1.shape}')

### 1.3 Function

In [None]:
before = "2000"
after = "1000"

tensor_1 = torch.load(f"{load_tensor_data_dir}1_{before}_{after}.pt")
tensor_2 = torch.load(f"{load_tensor_data_dir}2_{before}_{after}.pt")
tensor_3 = torch.load(f"{load_tensor_data_dir}3_{before}_{after}.pt")

window_size = 1000
step_size = 1

centered_data_1 = sliding_window_sum(tensor_1, window_size, step_size, centered=True)
centered_data_2 = sliding_window_sum(tensor_2, window_size, step_size, centered=True)
centered_data_3 = sliding_window_sum(tensor_3, window_size, step_size, centered=True)

centered_data_1 = torch.tensor(centered_data_1)
centered_data_2 = torch.tensor(centered_data_2)
centered_data_3 = torch.tensor(centered_data_3)

torch.save(centered_data_1, f'{data_dir}2_concatenated/data_1_{before}_{after}_{window_size}_{step_size}.pt')
torch.save(centered_data_2, f'{data_dir}2_concatenated/data_2_{before}_{after}_{window_size}_{step_size}.pt')
torch.save(centered_data_3, f'{data_dir}2_concatenated/data_3_{before}_{after}_{window_size}_{step_size}.pt')

print(f'Centered tensor saved to {data_dir}2_concatenated/data_1-3_{before}_{after}_{window_size}_{step_size}.pt')

Centered tensor saved to drive/MyDrive/neuro2voc/task_4/data/2_concatenated/data_1-3_2000_1000_1000_1.pt
