In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
!pip install torch-geometric
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd

Collecting torch-geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.5.3


In [4]:
def get_data():
    data_file = "/content/drive/My Drive/data.csv"
    data = pd.read_csv(data_file)
    return data

def get_XY(data, weights=None):
    features = ['valence', 'key', 'tempo', 'acousticness', 'danceability', 'energy', 'explicit', 'instrumentalness', 'liveness', 'speechiness', 'loudness', 'year']
    X = data[features].values
    Y = data['id'].values if 'id' in data.columns else None

    # Scale the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Apply weights
    if weights is not None:
        for i, weight in enumerate(weights):
            X[:, i] *= weight

    return X, Y


In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(12, 64)
        self.conv2 = GCNConv(64, 64)
        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)

        x = torch.mean(x, dim=0).unsqueeze(0)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.sigmoid(x)

In [18]:
def train_gnn(data, weights, epochs=100):
    model = Net()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    X, Y = get_XY(data, weights)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.BCELoss()

    edge_index = torch.tensor([[i, i] for i in range(X.shape[0])], dtype=torch.long).t().contiguous()
    edge_index = edge_index.to(device)

    x = torch.tensor(X, dtype=torch.float).to(device)
    y = torch.tensor([[1]], dtype=torch.float).to(device)
    loader = DataLoader([Data(x=x, edge_index=edge_index, y=y)], batch_size=1)

    model.to(device)

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()

        out = model(loader.dataset[0].to(device))
        loss = criterion(out, loader.dataset[0].y)
        loss.backward()
        optimizer.step()

        if epoch % 10 == 0:
            print(f'Epoch: {epoch}, Loss: {loss.item()}')

    torch.save(model.state_dict(), 'gnn_model.pth')


In [29]:
def predict_with_gnn(embedding, weights, data, n_neighbors=7):
    model = Net()
    model.load_state_dict(torch.load('gnn_model.pth', map_location=torch.device('cpu')))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()

    X, Y = get_XY(data, weights)
    x = torch.tensor(X, dtype=torch.float)
    edge_index = torch.tensor([[i, i] for i in range(X.shape[0])], dtype=torch.long).t().contiguous()
    edge_index = edge_index.to(device)
    x = x.to(device)
    loader = DataLoader([Data(x=x, edge_index=edge_index)])

    with torch.no_grad():
        prediction = model(loader.dataset[0].to(device))

    prediction = prediction.cpu().numpy()
    idx = np.argsort(prediction, axis=0)[-n_neighbors:][::-1].reshape(-1)
    predictions = []
    for index in idx:
        predictions.append([data.loc[index, 'artists'], data.loc[index, 'name'], data.loc[index, 'release_date']])
    return predictions



In [8]:
def convert_embedding(embedding):
    column_order = ['valence', 'key', 'tempo', 'acousticness', 'danceability', 'energy', 'explicit', 'instrumentalness', 'liveness', 'speechiness', 'loudness', 'year']
    df = pd.DataFrame.from_dict(embedding, orient='index').T
    df = df[column_order]
    return df

In [19]:
weights = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
data = get_data()
train_gnn(data, weights)



Epoch: 0, Loss: 0.7483801245689392
Epoch: 10, Loss: 1.6927862816373818e-05
Epoch: 20, Loss: 0.0
Epoch: 30, Loss: 0.0
Epoch: 40, Loss: 0.0
Epoch: 50, Loss: 0.0
Epoch: 60, Loss: 0.0
Epoch: 70, Loss: 0.0
Epoch: 80, Loss: 0.0
Epoch: 90, Loss: 0.0


In [30]:
embedding = {
    "valence": 0.9,
    "key": 7,
    "tempo": 120.0,
    "acousticness": 0.2,
    "danceability": 0.8,
    "energy": 0.7,
    "explicit": 0,
    "instrumentalness": 0.1,
    "liveness": 0.5,
    "speechiness": 0.2,
    "loudness": -10.0,
    "year": 2020
}
predictions = predict_with_gnn(embedding, weights, data, n_neighbors=7)
print(predictions)

[["['Sergei Rachmaninoff', 'James Levine', 'Berliner Philharmoniker']", 'Piano Concerto No. 3 in D Minor, Op. 30: III. Finale. Alla breve', '1921']]


