In [1]:
# libraries
import pandas as pd
import networkx as nx
from source.utils import train_val_test_labels, labels_to_numbers, load_data_df
from source.structure_modeling import torch_geometric_data_from_graph
from source.models import GraphSAGE, GraphSAGE2, train_model

## Labels

In [2]:
spectr_dir = "spectrograms/"
labels_train, labels_valid, labels_test = train_val_test_labels(spectr_dir, "spectr_train_list.txt", "spectr_valid_list.txt", "spectr_test_list.txt", index_col=0, header=0, pos=1)
labels = labels_train + labels_valid + labels_test
label_names, labels_nr = labels_to_numbers(labels)

## Datasets

In [None]:
raw_audio_df = load_data_df("data/raw_audio/")

In [3]:
spectr2_dir = "data/spectrogram2/"
train_df = pd.read_csv(spectr2_dir+"train.csv", header = None)
valid_df = pd.read_csv(spectr2_dir+"valid.csv", header = None)
test_df = pd.read_csv(spectr2_dir+"test.csv", header = None)
df = pd.concat([train_df, valid_df, test_df])

# Building graph from the data points

In [4]:
G = nx.read_adjlist("graphs/spect2_kNN.adjlist")

FileNotFoundError: [Errno 2] No such file or directory: 'graphs/spect2_kNN.adjlist'

## Create torch_geometric.data

In [None]:
data = torch_geometric_data_from_graph(G, df, labels_nr, len(labels_train), len(labels_valid), len(labels_test))

In [None]:
# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Number of features: {data.num_features}')
print(f'Number of classes: {data.num_classes}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Number of validation nodes: {data.valid_mask.sum()}')
print(f'Number of testing nodes: {data.test_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Number of nodes: 23682
Number of edges: 151290
Number of features: 64
Number of classes: 10
Average node degree: 6.39
Number of training nodes: 18538
Number of validation nodes: 2567
Number of testing nodes: 2577
Training node label rate: 0.78
Has isolated nodes: False
Has self-loops: False
Is undirected: True
