In [1]:
# libraries
import pandas as pd
import networkx as nx
from source.utils import train_val_test_labels, labels_to_numbers, load_data_df, describe_pyg_data
from source.structure_modeling import torch_geometric_data_from_graph
from source.models import GraphSAGE, GraphSAGE2, train_model

## Labels

In [2]:
audio_dir="audio/"
labels_train, labels_valid, labels_test = train_val_test_labels(audio_dir, "training_list.txt", "validation_list.txt", "testing_list.txt", index_col=None, header=None, pos=0)
labels = labels_train + labels_valid + labels_test
label_names, labels_nr = labels_to_numbers(labels)

In [3]:
spectr_dir = "spectrograms/"
spectr_labels_train, spectr_labels_valid, spectr_labels_test = train_val_test_labels(spectr_dir, "spectr_train_list.txt", "spectr_valid_list.txt", "spectr_test_list.txt", index_col=0, header=0, pos=1)
spectr_labels = spectr_labels_train + spectr_labels_valid + spectr_labels_test
spectr_label_names, spectr_labels_nr = labels_to_numbers(spectr_labels)

## Datasets

In [4]:
raw_audio_df = load_data_df("data/raw_audio/", header=0)
spectr2_df = load_data_df("data/spectrogram2/")
spectr3_df = load_data_df("data/spectrogram3/")

## Graphs

In [12]:
raw_audio_G = nx.read_adjlist("graphs/raw_audio_kNN.adjlist")
spectr2_G = nx.read_adjlist("graphs/spectr2_kNN.adjlist")
spectr3_G = nx.read_adjlist("graphs/spectr3_kNN.adjlist")

## Create torch_geometric.data

In [13]:
raw_audio_data = torch_geometric_data_from_graph(raw_audio_G, raw_audio_df, labels_nr, len(labels_train), len(labels_valid), len(labels_test))
describe_pyg_data(raw_audio_data)

Number of nodes: 23682
Number of edges: 185200
Number of features: 1600
Number of classes: 10
Average node degree: 7.82
Number of training nodes: 18538
Number of validation nodes: 2567
Number of testing nodes: 2577
Training node label rate: 0.78
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [9]:
spectr2_data = torch_geometric_data_from_graph(spectr2_G, spectr2_df, spectr_labels_nr, len(spectr_labels_train), len(spectr_labels_valid), len(spectr_labels_test))
describe_pyg_data(spectr2_data)

Number of nodes: 23682
Number of edges: 151290
Number of features: 64
Number of classes: 10
Average node degree: 6.39
Number of training nodes: 18538
Number of validation nodes: 2567
Number of testing nodes: 2577
Training node label rate: 0.78
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [10]:
spectr3_data = torch_geometric_data_from_graph(spectr3_G, spectr3_df, spectr_labels_nr, len(spectr_labels_train), len(spectr_labels_valid), len(spectr_labels_test))
describe_pyg_data(spectr3_data)

Number of nodes: 23682
Number of edges: 155866
Number of features: 256
Number of classes: 10
Average node degree: 6.58
Number of training nodes: 18538
Number of validation nodes: 2567
Number of testing nodes: 2577
Training node label rate: 0.78
Has isolated nodes: False
Has self-loops: False
Is undirected: True


## Models

## Raw files + kNN 

In [14]:
GraphSAGE256_model = GraphSAGE(raw_audio_data, 256)
train_model(GraphSAGE256_model, raw_audio_data, 100, es_patience=10, es_threshold=0.001)

Epoch: 000, Loss: 413.6495, Acc: 0.1063
Epoch: 001, Loss: 2063.6204, Acc: 0.1001
Epoch: 002, Loss: 2747.2488, Acc: 0.1036
Epoch: 003, Loss: 2429.0723, Acc: 0.0931
Epoch: 004, Loss: 1884.4980, Acc: 0.0978
Epoch: 005, Loss: 1945.1971, Acc: 0.1048
Epoch: 006, Loss: 1791.4187, Acc: 0.1067
Early stopping at epoch 6


In [15]:
GraphSAGE256_model = GraphSAGE(spectr2_data, 256)
train_model(GraphSAGE256_model, spectr2_data, 100, es_patience=10, es_threshold=0.001)

Epoch: 000, Loss: 4.1580, Acc: 0.1414
Epoch: 001, Loss: 9.3317, Acc: 0.1289
Epoch: 002, Loss: 6.6050, Acc: 0.1554
Epoch: 003, Loss: 3.9215, Acc: 0.1578
Epoch: 004, Loss: 2.7383, Acc: 0.1741
Epoch: 005, Loss: 2.3263, Acc: 0.1488
Epoch: 006, Loss: 2.2447, Acc: 0.1480
Epoch: 007, Loss: 2.2256, Acc: 0.1434
Epoch: 008, Loss: 2.2088, Acc: 0.1414
Epoch: 009, Loss: 2.1962, Acc: 0.1422
Epoch: 010, Loss: 2.1815, Acc: 0.1508
Epoch: 011, Loss: 2.1543, Acc: 0.1574
Epoch: 012, Loss: 2.1372, Acc: 0.1671
Epoch: 013, Loss: 2.1190, Acc: 0.1808
Epoch: 014, Loss: 2.0935, Acc: 0.1839
Epoch: 015, Loss: 2.0730, Acc: 0.1995
Epoch: 016, Loss: 2.0472, Acc: 0.2287
Epoch: 017, Loss: 2.0231, Acc: 0.2392
Epoch: 018, Loss: 2.0058, Acc: 0.2333
Epoch: 019, Loss: 1.9846, Acc: 0.2411
Epoch: 020, Loss: 1.9651, Acc: 0.2489
Epoch: 021, Loss: 1.9426, Acc: 0.2497
Epoch: 022, Loss: 1.9268, Acc: 0.2450
Epoch: 023, Loss: 1.8993, Acc: 0.2485
Epoch: 024, Loss: 1.8931, Acc: 0.2579
Epoch: 025, Loss: 1.8765, Acc: 0.2633
Epoch: 026, 

In [16]:
GraphSAGE256_model = GraphSAGE(spectr3_data, 256)
train_model(GraphSAGE256_model, spectr3_data, 100, es_patience=10, es_threshold=0.001)

Epoch: 000, Loss: 5.3805, Acc: 0.1250
Epoch: 001, Loss: 20.3889, Acc: 0.1145
Epoch: 002, Loss: 9.7058, Acc: 0.1660
Epoch: 003, Loss: 4.1486, Acc: 0.1457
Epoch: 004, Loss: 2.8536, Acc: 0.1449
Epoch: 005, Loss: 2.3922, Acc: 0.1434
Epoch: 006, Loss: 2.2077, Acc: 0.1247
Epoch: 007, Loss: 2.1762, Acc: 0.1375
Epoch: 008, Loss: 2.1786, Acc: 0.1426
Epoch: 009, Loss: 2.1668, Acc: 0.1414
Epoch: 010, Loss: 2.1577, Acc: 0.1387
Epoch: 011, Loss: 2.1270, Acc: 0.1445
Epoch: 012, Loss: 2.1028, Acc: 0.1531
Epoch: 013, Loss: 2.0835, Acc: 0.1539
Epoch: 014, Loss: 2.0616, Acc: 0.1617
Epoch: 015, Loss: 2.0442, Acc: 0.1621
Epoch: 016, Loss: 2.0200, Acc: 0.1757
Epoch: 017, Loss: 1.9892, Acc: 0.1924
Epoch: 018, Loss: 1.9750, Acc: 0.1979
Epoch: 019, Loss: 1.9546, Acc: 0.2022
Epoch: 020, Loss: 1.9500, Acc: 0.2069
Epoch: 021, Loss: 1.9334, Acc: 0.1959
Epoch: 022, Loss: 1.9013, Acc: 0.1995
Epoch: 023, Loss: 1.8883, Acc: 0.2174
Epoch: 024, Loss: 1.8776, Acc: 0.2170
Epoch: 025, Loss: 1.8688, Acc: 0.2174
Epoch: 026,