In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Reading in data

In [None]:
mitbih_train_df = pd.read_csv("../data/mitbih/mitbih_train.csv", header=None)
mitbih_test_df = pd.read_csv("../data/mitbih/mitbih_test.csv", header=None)

In [None]:
mitbih_train_df.head()

In [None]:
mitbih_train_df.shape

In [None]:
mitbih_test_df.head()

In [None]:
mitbih_test_df.shape

## Looking at data distribution

In [None]:
mitbih_train_df[187] = mitbih_train_df[187].astype(int)
count = mitbih_train_df[187].value_counts()
labels = ["normal beats", "Supra. beats", "Ventric. beats", "Fusion beats", "Unknown beats"]

plt.figure(figsize=(10, 10))
pie = plt.Circle((0, 0), 0.7, color="white")
plt.pie(count, labels=["Normal beats", "Supra. beats", "Ventric. beats", "Fusion beats", "Unknown beats"], colors=["green", "blue", "yellow", "purple", "lightblue"], autopct='%1.0f%%')
p = plt.gcf()
p.gca().add_artist(pie)

plt.savefig(f"images/data_distro.pdf", bbox_inches='tight')
plt.show()

## Looking at the ECG-signals for the different classes

In [None]:
samples = mitbih_train_df.groupby(187, group_keys=False).apply(lambda mitbih_train_df: mitbih_train_df.sample(1))
samples

In [None]:
plt.figure(figsize=(20, 20))
for i, name in enumerate(["Normal beats", "Supra. beats", "Ventric. beats", "Fusion beats", "Unknown beats"]):
    plt.subplot(3,3,i+1)
    plt.xlabel("ms")
    plt.ylabel("mV")
    plt.plot(samples.iloc[i,:186])
    plt.title(name)

plt.savefig("images/graphs_ecg_2.png", dpi=960)

## Time series heatmap for the different classes

In [None]:
def heatmap(df, class_label, min_val, size, title):
    img = df.loc[mitbih_train_df[187]==class_label].values
    img = img[:, min_val:size]
    img_flatten = img.flatten()

    final = np.arange(min_val, size)
    for _ in range(img.shape[0]-1):
        tempo = np.arange(min_val, size)
        final = np.concatenate((final, tempo), axis=None)
    plt.hist2d(final, img_flatten, bins=(65, 65), cmap=plt.cm.jet)
    plt.colorbar()
    plt.title('2D Histogram - '+ title)

In [None]:
plt.figure(figsize=(20, 20))
for i, name in enumerate(["Normal beats", "Supra. beats", "Ventric. beats", "Fusion beats", "Unknown beats"]):
    plt.subplot(3,3,i+1)
    plt.xlabel("ms")
    plt.ylabel("mV")
    heatmap(mitbih_train_df, i, 5, 70, name)

plt.savefig(f"images/2d_histogram.pdf", bbox_inches='tight')
plt.show()

In [None]:
from sklearn.utils import resample
df_1=mitbih_train_df[mitbih_train_df[187]==1]
df_2=mitbih_train_df[mitbih_train_df[187]==2]
df_3=mitbih_train_df[mitbih_train_df[187]==3]
df_4=mitbih_train_df[mitbih_train_df[187]==4]
df_0=(mitbih_train_df[mitbih_train_df[187]==0]).sample(n=20000,random_state=42)

df_1_upsample=resample(df_1,replace=True,n_samples=20000,random_state=123)
df_2_upsample=resample(df_2,replace=True,n_samples=20000,random_state=124)
df_3_upsample=resample(df_3,replace=True,n_samples=20000,random_state=125)
df_4_upsample=resample(df_4,replace=True,n_samples=20000,random_state=126)

train_df=pd.concat([df_0,df_1_upsample,df_2_upsample,df_3_upsample,df_4_upsample])

In [None]:
train_df[187] = train_df[187].astype(int)
count = train_df[187].value_counts()
labels = ["normal beats", "Supra. beats", "Ventric. beats", "Fusion beats", "Unknown beats"]

plt.figure(figsize=(10, 10))
pie = plt.Circle((0, 0), 0.7, color="white")
plt.pie(count, labels=["normal beats", "Supra. beats", "Ventric. beats", "Fusion beats", "Unknown beats"], colors=["green", "blue", "yellow", "purple", "lightblue"], autopct='%1.0f%%')
p = plt.gcf()
p.gca().add_artist(pie)
plt.savefig(f"images/data_distribution_after_datapreprocessing.pdf", bbox_inches='tight')
plt.show()