# Classification of Time Series ECG using Visibility Graphs

## Generating the Dataset

In [70]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [14]:
DataNormal = pd.read_csv('ptbdb_normal.csv', header=None)
DataAbnormal = pd.read_csv('ptbdb_abnormal.csv', header=None)
DataNormal = DataNormal.to_numpy()
DataAbnormal = DataAbnormal.to_numpy()

In [61]:
ECG = dict()
ECG['timeSeries_data'] = np.append(DataNormal, DataAbnormal, axis=0)
ECG['target'] = np.append(np.zeros(len(DataNormal)), np.ones(len(DataAbnormal)))
ECG['target_names'] = np.array(['normal', 'abnormal'])

In [65]:
ECG['timeSeries_data'].shape

(14552, 188)

In [107]:
from numpy.linalg import eig
def graphIndexComplexity(adjMatrix, graph):
    eigValues, _ = eig(adjMatrix)
    kmax = np.max(eigValues)
    const = 2*np.cos(np.pi/(len(graph)+1))
    C = (kmax - const)/(len(graph) - 1 - const)
    return 4*C*(1-C)

In [None]:
from ts2vg import NaturalVG, HorizontalVG
import networkx as nx
import scipy.stats as stat

ECG['data'] = []

for ts in ECG['timeSeries_data']:

    newData = []

    # applying Natural Visibility Graph
    NVG = NaturalVG()
    NVG.build(ts)
    graph = NVG.as_networkx()

    # extract adjacency matrix
    adjMatrix = nx.adjacency_matrix(graph)
    adjMatrix = adjMatrix.toarray()

    # degree distribution
    degDist = [d for _, d in graph.degree()]

    # FEATURE EXTRACTION
    # mean, median, mode, max, min, std of degDist, and max(degDist)/Median
    statData = [np.mean(degDist), np.median(degDist), stat.mode(degDist)[0][0], np.max(degDist), np.min(degDist), np.max(degDist)/np.median(degDist)]

    # graph index complexity
    GIC = graphIndexComplexity(adjMatrix, graph)

    # characteristic path length
    L = nx.average_shortest_path_length(graph, weight=None)

    # global efficiency
    Eg = nx.global_efficiency(graph)

    # average clustering coefficient
    C = nx.average_clustering(graph)

    # local efficiency
    El = nx.local_efficiency(graph)

    # assortativity coefficient
    r = nx.degree_assortativity_coefficient(graph)

    # additional metrics
    newData.extend(statData)
    newData.extend([GIC, L, Eg, C, El, r])

    # final data collection
    ECG['data'].append(newData)


In [42]:
from module_Dataset import load_ECG

X, y = load_ECG('ptbdb_normal.csv', 'ptbdb_abnormal.csv', numNormal=100, numAbnormal=100, return_X_y=True)

In [51]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
len(y_train), len(y_test)

(140, 60)

In [75]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import svm

LR = LinearRegression()
LR.fit(X_train, y_train)

logR = LogisticRegression(max_iter=1000)
logR.fit(X_train, y_train)

SVM = svm.SVC(kernel='linear')
SVM.fit(X_train, y_train)

In [76]:
from sklearn.metrics import mean_squared_error, confusion_matrix

mean_squared_error(y_test, LR.predict(X_test)), mean_squared_error(y_test, logR.predict(X_test)), mean_squared_error(y_test, SVM.predict(X_test))

(0.36202036285591915, 0.25, 0.25)

In [78]:
confusion_matrix(y_test, SVM.predict(X_test)), confusion_matrix(y_test, logR.predict(X_test))

(array([[20, 11],
        [ 4, 25]], dtype=int64),
 array([[20, 11],
        [ 4, 25]], dtype=int64))

In [65]:
SVM.predict(X_test), y_test

(array([0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 0., 0.,
        1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.,
        1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 1.,
        1., 0., 0., 1., 0., 1., 1., 1., 0.]),
 array([0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 0.,
        1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
        1., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 1., 0., 1.,
        1., 0., 1., 1., 0., 1., 0., 1., 0.]))