In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [4]:
airport_emb = pd.read_csv('airports_128_0_node2vec.emb', sep=' ', header=None)  
airport_roles = pd.read_csv('airport_roles.csv', sep=';')

In [5]:
airport_roles

Unnamed: 0,NODE_ID,PROMINENCY_LABEL,PROMINENCY_WEIGHT,PROMINENCY_PURITY,RECIPROCITY_LABEL,RECIPROCITY_WEIGHT,RECIPROCITY_PURITY
0,1,StronglyProminent,106140350877193,000931055709449061,NonReciprocal,0,0
1,2,StronglyProminent,0746268656716418,000556916907997327,NonReciprocal,0,0
2,3,StronglyProminent,0128,0001024,NonReciprocal,0,0
3,4,StronglyProminent,298870056497175,00168853139263941,NonReciprocal,0,0
4,5,StronglyProminent,009,00009,NonReciprocal,0,0
...,...,...,...,...,...,...,...
457,458,NonProminent,0,0,NonReciprocal,0,0
458,459,NonProminent,0,0,StronglyReciprocal,1,1
459,460,NonProminent,0,0,NonReciprocal,0,0
460,461,NonProminent,0,0,NonReciprocal,0,0


In [6]:
airport_emb_with_role = airport_emb.merge(airport_roles[['NODE_ID','PROMINENCY_LABEL']], left_on=0, right_on='NODE_ID')
airport_emb_with_role = airport_emb_with_role.drop(columns=['NODE_ID', 0])

In [7]:
airport_emb_with_role

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,120,121,122,123,124,125,126,127,128,PROMINENCY_LABEL
0,0.016058,0.063933,0.054588,0.040625,0.050556,0.160908,-0.010950,-0.161459,-0.084790,0.235999,...,-0.111348,0.076122,0.064705,0.144867,-0.154470,0.234947,-0.034051,0.008744,0.015871,StronglyProminent
1,0.068996,-0.000120,0.068125,0.095545,0.125746,0.059112,0.010365,-0.034523,-0.086110,0.272053,...,-0.084844,0.057168,-0.059127,0.048876,-0.039810,0.113723,-0.054531,-0.032524,0.047727,StronglyProminent
2,0.033917,0.163112,0.050117,0.048434,0.238322,0.048428,0.189923,-0.008983,-0.128208,0.231640,...,0.198227,-0.114497,0.091908,-0.041870,-0.074020,0.212175,0.161038,-0.161156,0.013230,StronglyProminent
3,0.184043,-0.277091,-0.020888,-0.013034,-0.031497,0.121122,-0.130410,-0.177157,-0.027745,0.054277,...,0.078421,0.032646,-0.194771,-0.108847,0.160198,-0.178540,0.019218,-0.117237,0.040194,StronglyProminent
4,-0.067894,0.071175,0.180074,0.113855,0.218414,0.151411,-0.144999,-0.049340,-0.011345,0.231888,...,-0.053104,-0.212758,0.260246,0.184051,-0.171183,-0.170551,-0.191680,-0.125550,-0.195252,StronglyProminent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
457,0.035888,-0.047917,0.219685,0.177531,0.078997,0.138309,-0.242006,-0.036315,-0.022751,0.367110,...,-0.105111,0.078644,0.091999,-0.063185,-0.172420,-0.040271,0.051851,0.127846,-0.035507,NonProminent
458,0.106969,-0.270619,-0.140961,0.105378,-0.022253,-0.009216,-0.102316,0.157874,-0.068019,0.201515,...,-0.204088,-0.011691,0.117562,-0.077845,0.031232,0.129759,-0.100748,-0.148881,0.134401,NonProminent
459,-0.072001,0.260724,0.168977,0.160464,0.292578,0.001488,0.025246,-0.209288,-0.067745,0.250530,...,0.182832,-0.163463,0.082283,0.277490,0.040657,-0.140256,-0.088254,-0.072066,-0.085449,NonProminent
460,0.225843,-0.038367,0.017112,0.235046,-0.057757,0.217702,0.031808,-0.069936,0.001426,0.117218,...,0.018645,-0.234505,0.006695,-0.064213,-0.216503,-0.240182,-0.037150,-0.162238,-0.044596,NonProminent


In [8]:
# encode PROMINENCY_LABEL to integers
label_encoder = LabelEncoder()
airport_emb_with_role['PROMINENCY_LABEL'] = label_encoder.fit_transform(airport_emb_with_role['PROMINENCY_LABEL'])


In [9]:
# train test split 
X = airport_emb_with_role.drop(columns=['PROMINENCY_LABEL'])
y = airport_emb_with_role['PROMINENCY_LABEL']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=55)


In [10]:
# create neural network from pytorch  to classify roles
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

In [11]:
# create a dataset from the tensors
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# create a dataloader from the dataset
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [12]:


# create a neural network
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(128, 256)
        self.fc2 = nn.Linear(256, 512)
        self.fc3 = nn.Linear(512, 1024)
        self.fc4 = nn.Linear(1024, 3)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        return F.softmax(x, dim=1)

In [13]:
# create a model
model = Classifier()

# create a loss function
criterion = nn.CrossEntropyLoss()

# create an optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [14]:

# train the model
epochs = 100
train_losses = []
test_losses = []

for e in range(epochs):
    running_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        log_ps = model(inputs)
        loss = criterion(log_ps, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    else:
        test_loss = 0
        accuracy = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                log_ps = model(inputs)
                test_loss += criterion(log_ps, labels)
                ps = torch.exp(log_ps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor))
        train_losses.append(running_loss/len(train_loader))
        test_losses.append(test_loss/len(test_loader))
        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
              "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
              "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))

Epoch: 1/100..  Training Loss: 1.039..  Test Loss: 0.949..  Test Accuracy: 0.611
Epoch: 2/100..  Training Loss: 0.893..  Test Loss: 0.937..  Test Accuracy: 0.614
Epoch: 3/100..  Training Loss: 0.891..  Test Loss: 0.935..  Test Accuracy: 0.616
Epoch: 4/100..  Training Loss: 0.894..  Test Loss: 0.942..  Test Accuracy: 0.610
Epoch: 5/100..  Training Loss: 0.901..  Test Loss: 0.943..  Test Accuracy: 0.609
Epoch: 6/100..  Training Loss: 0.889..  Test Loss: 0.935..  Test Accuracy: 0.616
Epoch: 7/100..  Training Loss: 0.891..  Test Loss: 0.936..  Test Accuracy: 0.615
Epoch: 8/100..  Training Loss: 0.889..  Test Loss: 0.938..  Test Accuracy: 0.613
Epoch: 9/100..  Training Loss: 0.898..  Test Loss: 0.937..  Test Accuracy: 0.614
Epoch: 10/100..  Training Loss: 0.894..  Test Loss: 0.939..  Test Accuracy: 0.612
Epoch: 11/100..  Training Loss: 0.901..  Test Loss: 0.937..  Test Accuracy: 0.614
Epoch: 12/100..  Training Loss: 0.896..  Test Loss: 0.935..  Test Accuracy: 0.616
Epoch: 13/100..  Training

In [15]:
# evaluate the model
model.eval()
with torch.no_grad():
    log_ps = model(X_test_tensor)
    ps = torch.exp(log_ps)
    top_p, top_class = ps.topk(1, dim=1)
    equals = top_class == y_test_tensor.view(*top_class.shape)
    accuracy = torch.mean(equals.type(torch.FloatTensor))
    print(f'Accuracy: {accuracy.item()*100}%')


Accuracy: 61.29032373428345%
