# Exploration

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
mitbih_train = pd.read_csv('mitbih_train.csv', header=None)
mitbih_test = pd.read_csv('mitbih_test.csv', header=None)

In [None]:
mitbih_train.columns = [f"feature_{i}" for i in range(mitbih_train.shape[1] - 1)] + ['label']
mitbih_test.columns = [f"feature_{i}" for i in range(mitbih_test.shape[1] - 1)] + ['label']

In [None]:
plt.figure(figsize=(12, 6))
sns.countplot(x='label', data=mitbih_train)
plt.title('Class distribution in train set (MITBIH)')
plt.show()

plt.figure(figsize=(12, 6))
sns.countplot(x='label', data=mitbih_test)
plt.title('Class distribution in test set (MITBIH)')
plt.show()

In [None]:
ptbdb_abnormal = pd.read_csv('ptbdb_abnormal.csv', header=None)
ptbdb_normal = pd.read_csv('ptbdb_normal.csv', header=None)

In [None]:
ptbdb_abnormal['label'] = 1
ptbdb_normal['label'] = 0
ptbdb = pd.concat([ptbdb_normal, ptbdb_abnormal], axis=0).reset_index(drop=True)

In [None]:
plt.figure(figsize=(12, 6))

normal_example = ptbdb[ptbdb['label'] == 0].iloc[0, :-1]
plt.plot(normal_example, label='Normal')
plt.title('Normal series example')
plt.legend()
plt.show()

anormal_example = ptbdb[ptbdb['label'] == 1].iloc[0, :-1]
plt.plot(anormal_example, label='Anormal')
plt.title('Anormal series example')
plt.legend()
plt.show()

In [None]:
mean_normal = ptbdb[ptbdb['label'] == 0].iloc[:, :-1].mean()
std_normal = ptbdb[ptbdb['label'] == 0].iloc[:, :-1].std()

mean_anormal = ptbdb[ptbdb['label'] == 1].iloc[:, :-1].mean()
std_anormal = ptbdb[ptbdb['label'] == 1].iloc[:, :-1].std()

plt.figure(figsize=(12, 6))
plt.plot(mean_normal, label='Normal mean')
plt.fill_between(range(len(mean_normal)), mean_normal - std_normal, mean_normal + std_normal, alpha=0.2)
plt.title('Normal mean and standard deviation')
plt.legend()
plt.show()

plt.figure(figsize=(12, 6))
plt.plot(mean_anormal, label='Anormal mean')
plt.fill_between(range(len(mean_anormal)), mean_anormal - std_anormal, mean_anormal + std_anormal, alpha=0.2)
plt.title('Anormal mean and standard deviation')
plt.legend()
plt.show()

# Train

In [None]:
X_mitbih_train = mitbih_train.iloc[:, :-1].values
Y_mitbih_train = mitbih_train.iloc[:, -1].values.astype(int)
X_mitbih_test = mitbih_test.iloc[:, :-1].values
Y_mitbih_test = mitbih_test.iloc[:, -1].values.astype(int)

X_ptbdb = ptbdb.iloc[:, :-1].values
Y_ptbdb = ptbdb.iloc[:, -1].values

In [None]:
from sklearn.model_selection import train_test_split

X_ptbdb_train, X_ptbdb_test, Y_ptbdb_train, Y_ptbdb_test = train_test_split(X_ptbdb, Y_ptbdb, test_size=0.2, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_mitbih_train = scaler.fit_transform(X_mitbih_train)
X_mitbih_test = scaler.transform(X_mitbih_test)
X_ptbdb_train = scaler.fit_transform(X_ptbdb_train)
X_ptbdb_test = scaler.transform(X_ptbdb_test)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.hidden1 = nn.Linear(input_size, 128)
        self.hidden2 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 5)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.relu(self.hidden1(x))
        x = self.dropout(x)
        x = self.relu(self.hidden2(x))
        x = self.output(x)
        return x
    
def train(num_epochs, model, train_loader, criterion, optimizer):
    for epoch in range(num_epochs):
        model.train()
        for X_batch, Y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, Y_batch)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

def eval(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, Y_batch in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs.data, 1)
            total += Y_batch.size(0)
            correct += (predicted == Y_batch).sum().item()
    print(f'Accuracy: {100 * correct / total:.2f}%')

In [None]:
X_train_tensor = torch.tensor(X_mitbih_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_mitbih_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_mitbih_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_mitbih_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model = MLP(input_size=X_mitbih_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train(10, model, train_loader, criterion, optimizer)
eval(model, test_loader)

In [None]:
X_train_tensor = torch.tensor(X_ptbdb_train, dtype=torch.float32)
y_train_tensor = torch.tensor(Y_ptbdb_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_ptbdb_test, dtype=torch.float32)
y_test_tensor = torch.tensor(Y_ptbdb_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model = MLP(input_size=X_ptbdb_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train(10, model, train_loader, criterion, optimizer)
eval(model, test_loader)