In [30]:
# libraries
import numpy as np
import pandas as pd
import networkx as nx
from source.utils import train_val_test_labels, labels_to_numbers, load_data_df, describe_pyg_data, visualize
from source.structure_modeling import torch_geometric_data_from_graph
from source.models import GraphSAGE, GraphSAGE2, train_model, test

## Labels

In [2]:
audio_dir="audio/"
labels_train, labels_valid, labels_test = train_val_test_labels(audio_dir, "training_list.txt", "validation_list.txt", "testing_list.txt", index_col=None, header=None, pos=0)
labels = labels_train + labels_valid + labels_test
label_names, labels_nr = labels_to_numbers(labels)

In [3]:
spectr_dir = "spectrograms/"
spectr_labels_train, spectr_labels_valid, spectr_labels_test = train_val_test_labels(spectr_dir, "spectr_train_list.txt", "spectr_valid_list.txt", "spectr_test_list.txt", index_col=0, header=0, pos=1)
spectr_labels = spectr_labels_train + spectr_labels_valid + spectr_labels_test
spectr_label_names, spectr_labels_nr = labels_to_numbers(spectr_labels)

## Datasets

In [4]:
raw_audio_df = load_data_df("data/raw_audio/")
spectr2_df = load_data_df("data/spectrogram2/")
spectr3_df = load_data_df("data/spectrogram3/")

## Graphs

In [5]:
raw_audio_G = nx.read_adjlist("graphs/raw_audio_kNN.adjlist")
spectr2_G = nx.read_adjlist("graphs/spectr2_kNN.adjlist")
spectr3_G = nx.read_adjlist("graphs/spectr3_kNN.adjlist")

## Create torch_geometric.data

In [9]:
raw_audio_data = torch_geometric_data_from_graph(raw_audio_G, raw_audio_df, labels_nr, len(labels_train), len(labels_valid), len(labels_test))
describe_pyg_data(raw_audio_data)

Number of nodes: 23682
Number of edges: 185200
Number of features: 1600
Number of classes: 10
Average node degree: 7.82
Number of training nodes: 18538
Number of validation nodes: 2567
Number of testing nodes: 2577
Training node label rate: 0.78
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [11]:
spectr2_data = torch_geometric_data_from_graph(spectr2_G, spectr2_df, spectr_labels_nr, len(spectr_labels_train), len(spectr_labels_valid), len(spectr_labels_test))
describe_pyg_data(spectr2_data)

Number of nodes: 23682
Number of edges: 151290
Number of features: 64
Number of classes: 10
Average node degree: 6.39
Number of training nodes: 18538
Number of validation nodes: 2567
Number of testing nodes: 2577
Training node label rate: 0.78
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [10]:
spectr3_data = torch_geometric_data_from_graph(spectr3_G, spectr3_df, spectr_labels_nr, len(spectr_labels_train), len(spectr_labels_valid), len(spectr_labels_test))
describe_pyg_data(spectr3_data)

Number of nodes: 23682
Number of edges: 155866
Number of features: 256
Number of classes: 10
Average node degree: 6.58
Number of training nodes: 18538
Number of validation nodes: 2567
Number of testing nodes: 2577
Training node label rate: 0.78
Has isolated nodes: False
Has self-loops: False
Is undirected: True


## Models

## Raw files + kNN 

In [12]:
raw_audio_knn_model = GraphSAGE(raw_audio_data, 256)
raw_audio_knn_metrics = train_model(raw_audio_knn_model, raw_audio_data, 100, es_patience=10, es_threshold=0.001)
raw_audio_knn_metrics.to_csv("metrics/raw_audio_knn.csv", index=None)

Epoch: 000, Train loss: 413.6495, Valid Acc: 0.1063
Epoch: 001, Train loss: 2063.6204, Valid Acc: 0.1001
Epoch: 002, Train loss: 2747.2488, Valid Acc: 0.1036
Epoch: 003, Train loss: 2429.0723, Valid Acc: 0.0931
Epoch: 004, Train loss: 1884.4980, Valid Acc: 0.0978
Epoch: 005, Train loss: 1945.1971, Valid Acc: 0.1048
Epoch: 006, Train loss: 1791.4187, Valid Acc: 0.1067
Early stopping at epoch 6


In [22]:
raw_audio_knn_model.conv2.

SAGEConv(256, 10, aggr=mean)

In [13]:
test(raw_audio_knn_model, raw_audio_data)

0.09701202949165696

In [14]:
spectr2_knn_model = GraphSAGE(spectr2_data, 256)
spectr2_knn_metrics = train_model(spectr2_knn_model, spectr2_data, 100, es_patience=10, es_threshold=0.001)
spectr2_knn_metrics.to_csv("metrics/spectr2_knn.csv", index=None)

Epoch: 000, Train loss: 4.1580, Valid Acc: 0.1414
Epoch: 001, Train loss: 9.3317, Valid Acc: 0.1289
Epoch: 002, Train loss: 6.6050, Valid Acc: 0.1554
Epoch: 003, Train loss: 3.9215, Valid Acc: 0.1578
Epoch: 004, Train loss: 2.7383, Valid Acc: 0.1741
Epoch: 005, Train loss: 2.3263, Valid Acc: 0.1488
Epoch: 006, Train loss: 2.2447, Valid Acc: 0.1480
Epoch: 007, Train loss: 2.2256, Valid Acc: 0.1434
Epoch: 008, Train loss: 2.2088, Valid Acc: 0.1414
Epoch: 009, Train loss: 2.1962, Valid Acc: 0.1422
Epoch: 010, Train loss: 2.1815, Valid Acc: 0.1508
Epoch: 011, Train loss: 2.1543, Valid Acc: 0.1574
Epoch: 012, Train loss: 2.1372, Valid Acc: 0.1671
Epoch: 013, Train loss: 2.1190, Valid Acc: 0.1808
Epoch: 014, Train loss: 2.0935, Valid Acc: 0.1839
Epoch: 015, Train loss: 2.0730, Valid Acc: 0.1995
Epoch: 016, Train loss: 2.0472, Valid Acc: 0.2287
Epoch: 017, Train loss: 2.0231, Valid Acc: 0.2392
Epoch: 018, Train loss: 2.0058, Valid Acc: 0.2333
Epoch: 019, Train loss: 1.9846, Valid Acc: 0.2411


In [15]:
test(spectr2_knn_model, spectr2_data)

0.3573923166472643

In [16]:
spectr3_knn_model = GraphSAGE(spectr3_data, 256)
spectr3_knn_metrics = train_model(spectr3_knn_model, spectr3_data, 100, es_patience=10, es_threshold=0.001)
spectr3_knn_metrics.to_csv("metrics/spectr3_knn.csv", index=None)

Epoch: 000, Train loss: 5.3805, Valid Acc: 0.1250
Epoch: 001, Train loss: 20.3889, Valid Acc: 0.1145
Epoch: 002, Train loss: 9.7058, Valid Acc: 0.1660
Epoch: 003, Train loss: 4.1486, Valid Acc: 0.1457
Epoch: 004, Train loss: 2.8536, Valid Acc: 0.1449
Epoch: 005, Train loss: 2.3922, Valid Acc: 0.1434
Epoch: 006, Train loss: 2.2077, Valid Acc: 0.1247
Epoch: 007, Train loss: 2.1762, Valid Acc: 0.1375
Epoch: 008, Train loss: 2.1786, Valid Acc: 0.1426
Epoch: 009, Train loss: 2.1668, Valid Acc: 0.1414
Epoch: 010, Train loss: 2.1577, Valid Acc: 0.1387
Epoch: 011, Train loss: 2.1270, Valid Acc: 0.1445
Epoch: 012, Train loss: 2.1028, Valid Acc: 0.1531
Epoch: 013, Train loss: 2.0835, Valid Acc: 0.1539
Epoch: 014, Train loss: 2.0616, Valid Acc: 0.1617
Epoch: 015, Train loss: 2.0442, Valid Acc: 0.1621
Epoch: 016, Train loss: 2.0200, Valid Acc: 0.1757
Epoch: 017, Train loss: 1.9892, Valid Acc: 0.1924
Epoch: 018, Train loss: 1.9750, Valid Acc: 0.1979
Epoch: 019, Train loss: 1.9546, Valid Acc: 0.2022

In [17]:
test(spectr3_knn_model, spectr3_data)

0.2394256887854094