In [None]:
import os
import pandas as pd
import numpy as np
import mne
import sys
import random
import glob
import torch
import matplotlib.pyplot as plt
from pymatreader import read_mat
from torch.utils.data import Dataset
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
%matplotlib widget

In [None]:
data = read_mat(os.path.join('.', 'train', 'subject0', 'train2.mat'))
print(data.keys()) # type: ignore

In [None]:
def make_mne_data(data):
    # MNEのチャンネル情報の設定
    ch_names = [c.replace(' ', '') for c in data['ch_labels']]  # チャンネル名を取得
    ch_types = ['eeg'] * len(ch_names)  # チャンネルタイプ（全てEEGと仮定）

    # チャンネル情報を組み立てる
    info = mne.create_info(ch_names=ch_names, sfreq=500, ch_types=ch_types) # type: ignore

    # RawArrayオブジェクトの作成
    raw = mne.io.RawArray(data['data']*1e-6, info) # Vに変換
    raw.set_montage(mne.channels.make_standard_montage('standard_1020'))

    return raw

In [None]:
raw = make_mne_data(data)
print(raw)
print(raw.info)

In [None]:
raw.plot(duration=5, n_channels=72)

In [None]:
events = pd.DataFrame(data['event']).astype({'type': int, 'init_index':int}) # type: ignore
events.head()

In [None]:
events['init_time'] = (events['init_time']*500).astype(int) # 2ms間隔のインデックスに変換
events = events.rename(columns={'init_time': 'id', 'init_index':'test', 'type':'event_id'})[['id', 'test', 'event_id']]
event_dict = {
    'led/frontside_kickturn': 11,
    'led/backside_kickturn': 12,
    'led/pumping': 13,
    'laser/frontside_kickturn': 21,
    'laser/backside_kickturn': 22,
    'laser/pumping': 23
} # トリックの種別の対応付け

In [None]:
mne.viz.plot_events(events, event_id=event_dict, sfreq=500) # サンプルレートは500Hzなので, sfreq=500に設定

In [None]:
test_data = read_mat(os.path.join('.', 'test', 'subject0.mat'))
print(test_data.keys()) # type: ignore

In [None]:
print(test_data['data'].shape, test_data['ch_labels'].shape) # type: ignore

In [None]:
def make_data(src_dir, dst_dir, subject_id):
    print(subject_id)
    # split to train and val
    os.makedirs(os.path.join(dst_dir, 'train', subject_id), exist_ok=True)
    os.makedirs(os.path.join(dst_dir, 'val', subject_id), exist_ok=True)
    labels = {
        '11': 'frontside_kickturn',
        '12': 'backside_kickturn',
        '13': 'pumping',
        '21': 'frontside_kickturn',
        '22': 'backside_kickturn',
        '23': 'pumping'
    }
    counts = {'frontside_kickturn':0, 'backside_kickturn':0, 'pumping':0}
    for fname in os.listdir(os.path.join(src_dir, 'train', subject_id)):
        data = read_mat(os.path.join(src_dir, 'train', subject_id, fname))
        event = pd.DataFrame(data['event'])[['init_time', 'type']] # type: ignore
        ts = pd.DataFrame(np.concatenate([np.array([data['times']]), data['data']]).T, columns=['Time']+list(data['ch_labels'])) # type: ignore
        for i, d in event.iterrows():
            it = d['init_time']+0.2
            et = d['init_time']+0.7
            event_type = str(int(d['type']))
            ts_seg = ts[(ts['Time']>=it*1e3)&(ts['Time']<=et*1e3)]

            if fname!='train3.mat':
                if not os.path.exists(os.path.join(dst_dir, 'train', subject_id, labels[event_type])):
                    os.makedirs(os.path.join(dst_dir, 'train', subject_id, labels[event_type]), exist_ok=True)
                del ts_seg['Time']
                ts_seg.to_csv(os.path.join(dst_dir, 'train', subject_id, labels[event_type], '{:03d}.csv'.format(counts[labels[event_type]])), index=False, header=False)
            else:
                if not os.path.exists(os.path.join(dst_dir, 'val', subject_id, labels[event_type])):
                    os.makedirs(os.path.join(dst_dir, 'val', subject_id, labels[event_type]), exist_ok=True)
                del ts_seg['Time']
                ts_seg.to_csv(os.path.join(dst_dir, 'val', subject_id, labels[event_type], '{:03d}.csv'.format(counts[labels[event_type]])), index=False, header=False)


            counts[labels[event_type]]+=1

In [None]:
src_dir = '.'
dst_dir = 'test_modeling'
subject_ids = ['subject0', 'subject1', 'subject2', 'subject3', 'subject4']
for subject_id in subject_ids:
    make_data(src_dir=src_dir, dst_dir=dst_dir, subject_id=subject_id)

In [None]:
class SeqDataset(Dataset):
    def __init__(self, root, seq_length, is_train, transform=None):
        self.transform = transform
        self.seqs = []
        self.seq_labels = []
        self.class_names = os.listdir(root)
        self.class_names.sort()
        self.numof_classes = len(self.class_names)
        self.seq_length = seq_length
        self.is_train = is_train

        for (i,x) in enumerate(self.class_names):
            temp = glob.glob(os.path.join(root, x, '*'))
            temp.sort()
            self.seq_labels.extend([i]*len(temp))
            for t in temp:
                df = pd.read_csv(t, header=None)
                tensor = preprocess(df)
                self.seqs.append(tensor)

    def __getitem__(self, index):
        seq = self.seqs[index]
        if self.transform is not None:
            seq = self.transform(seq, is_train=self.is_train, seq_length=self.seq_length)
        return {'seq':seq, 'label':self.seq_labels[index]}


    def __len__(self):
        return len(self.seqs)


def preprocess(df: pd.DataFrame)->np.ndarray:
    # transpose
    mat = df.T.values

    # standerization
    mat = standardization(mat, axis=1)

    return mat


def standardization(a, axis=None, ddof=0):
    a_mean = a.mean(axis=axis, keepdims=True)
    a_std = a.std(axis=axis, keepdims=True, ddof=ddof)
    a_std[np.where(a_std==0)] = 1

    return (a - a_mean) / a_std


def add_noise(data, noise_level=0.01):
    noise = np.random.normal(0, noise_level, data.shape)
    data_noisy = data + noise
    return data_noisy.astype(np.float32)


def time_shift(data, shift):
    data_shifted = np.roll(data, shift)
    return data_shifted


def transform(array, is_train, seq_length):
    if is_train:
        _, n = array.shape
        s = random.randint(0, n-seq_length)
        ts = array[:,s:s+seq_length]
        ts = add_noise(ts).astype(np.float32)
        if random.randint(0,1):
            ts_r = ts[:,::-1].copy()
            return ts_r
        return ts
    else:
        ts = array[:,:seq_length].astype(np.float32)
        return ts

In [None]:
batch_size=10
train_dir = os.path.join('test_modeling', 'train', subject_id)
dataset = SeqDataset(root=train_dir, seq_length=250, is_train=True, transform=transform)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) # type: ignore

In [None]:
print(dataset.class_names)
for i, mini_batch in enumerate(data_loader):
    print(mini_batch['seq'].shape, mini_batch['label'])

In [None]:
class Net1DBN(torch.nn.Module):
    def __init__(self, num_channels, num_classes):
        super(Net1DBN, self).__init__()
        self.conv1 = torch.nn.Conv1d(num_channels, 128, kernel_size=3, stride=1)
        self.conv2 = torch.nn.Conv1d(128, 128, kernel_size=3, stride=1)
        self.conv3 = torch.nn.Conv1d(128, 128, kernel_size=3, stride=1)
        self.conv4 = torch.nn.Conv1d(128, 128, kernel_size=3, stride=1)
        self.bn1 = torch.nn.BatchNorm1d(128)
        self.bn2 = torch.nn.BatchNorm1d(128)
        self.bn3 = torch.nn.BatchNorm1d(128)
        self.bn4 = torch.nn.BatchNorm1d(128)
        self.maxpool = torch.nn.MaxPool1d(kernel_size=3, stride=2)
        self.gap = torch.nn.AdaptiveAvgPool1d(1)
        self.fc = torch.nn.Linear(128, num_classes)


    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = torch.relu(x)
        x = self.maxpool(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = torch.relu(x)
        x = self.maxpool(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = torch.relu(x)
        x = self.maxpool(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.gap(x)
        x = x.squeeze(2)

        x = self.fc(x)

        return x

In [None]:
num_channels = 72  # チャンネル数
num_classes = 3    # 判別するトリックの種別数
model = Net1DBN(num_channels, num_classes)
in_data = torch.randn(8, num_channels, 300)
out_data = model(in_data)
print(out_data)

In [None]:
def train(log_interval, model, device, train_loader, optimizer, epoch, iteration):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    for sample_batched in train_loader:
        data, target = sample_batched['seq'].to(device), sample_batched['label'].to(device)
        optimizer.zero_grad()
        output = model(data)
        pred = output.max(1, keepdim=True)[1]
        correct = pred.eq(target.view_as(pred)).sum().item()
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        iteration += 1
        if iteration % log_interval == 0:
            sys.stdout.write('\repoch:{0:>3} iteration:{1:>6} train_loss: {2:.6f} train_accracy: {3:5.2f}%'.format(
                            epoch, iteration, loss.item(), 100.*correct/float(len(sample_batched['label']))))
            sys.stdout.flush()
    return iteration


def val(model, device, test_loader):
    model.eval()
    criterion = torch.nn.CrossEntropyLoss(reduction='sum')
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for sample_batched in test_loader:
            data, target = sample_batched['seq'].to(device), sample_batched['label'].to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= float(len(test_loader.dataset))
    correct /= float(len(test_loader.dataset))
    print('\n  Validation: Accuracy: {0:.2f}%  test_loss: {1:.6f}'.format(100. * correct, test_loss))
    return test_loss, 100. * correct


def evaluate(model, device, test_loader):
    preds = []
    trues = []
    model.eval()
    with torch.no_grad():
        for sample_batched in test_loader:
            data, target = sample_batched['seq'].to(device), sample_batched['label'].to(device)
            output = model(data)
            pred = [test_loader.dataset.class_names[i] for i in list(output.max(1)[1].cpu().detach().numpy())]
            preds += pred
            true = [test_loader.dataset.class_names[i] for i in list(target.cpu().detach().numpy())]
            trues += true
    labels = test_loader.dataset.class_names
    cm = confusion_matrix(trues, preds, labels=labels)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
    disp.plot()
    plt.show()
    cr = classification_report(trues, preds, target_names=labels)
    print(cr)
    correct = 0
    for pred, true in zip(preds, trues):
        if pred == true:
            correct += 1
    df = pd.DataFrame({'pred': preds, 'true': trues})

    return correct/len(trues), df


def train_evaluate(train_dir, val_dir, log_interval, num_epoches, seq_length, transform=None, num_channels=72, num_classes = 3):
    model = Net1DBN(num_channels=num_channels, num_classes=num_classes)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    train_loader = torch.utils.data.DataLoader(SeqDataset(root=train_dir, seq_length=seq_length, is_train=True, transform=transform), batch_size=20, shuffle=True) # type: ignore
    optimizer = torch.optim.Adam(model.parameters())
    val_loader = torch.utils.data.DataLoader(SeqDataset(root=val_dir, seq_length=seq_length, is_train=False, transform=transform), batch_size=20, shuffle=False) # type: ignore
    iteration = 0
    for epoch in range(1, 1+num_epoches):
        iteration = train(log_interval, model, device, train_loader, optimizer, epoch, iteration)
        if epoch%10==0:
            test_loss, test_acc = val(model, device, val_loader)
    acc, df = evaluate(model, device, val_loader)
    print(acc)
    return model

In [None]:
log_interval = 5000
num_epoches = 100
seq_length = 250
models = {}
for subject_id in subject_ids:
    train_dir = os.path.join('test_modeling', 'train', subject_id)
    val_dir = os.path.join('test_modeling', 'val', subject_id)
    model = train_evaluate(train_dir, val_dir, log_interval, num_epoches, seq_length, transform)
    models[subject_id] = model

In [None]:
def output_pred(src_dir, root_dir, subject_ids, models, seq_length, transform, device):
    predictions = {}
    for subject_id in subject_ids:
        train_dir =os.path.join(root_dir, subject_id)
        class_names = os.listdir(train_dir)
        class_names.sort()
        data = read_mat(os.path.join(src_dir, 'test', '{}.mat'.format(subject_id)))
        for i, ts in enumerate(data['data']): # type: ignore
            tensor = torch.from_numpy(transform(standardization(ts, axis=1), is_train=False, seq_length=seq_length)).unsqueeze(0).to(device) # type: ignore
            pred = models[subject_id](tensor)
            _, output_index = pred.max(1)
            pred = output_index.squeeze(0).cpu().detach().numpy()
            predictions['{}_{:03d}'.format(subject_id, i)]=class_names[pred]
    result = pd.Series(predictions)

    return result

In [None]:
src_dir = '.'
root_dir = os.path.join('test_modeling', 'train')
subject_ids = ['subject0', 'subject1', 'subject2', 'subject3', 'subject4']
seq_length = 250
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
result = output_pred(src_dir, root_dir, subject_ids, models, seq_length, transform, device)
result.to_csv('submit.csv', header=False)