In [1]:
# Exercise 1

In [2]:
import math
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import pandas as pd

import distance as dis
import dtransform as dt
import subgraphs as sg

In [3]:
titles = ["Bouyancy Position", "Prismatic Coefficient", "Length-Displacement Ratio", \
          "Beam-Drought Ratio", "Length-Beam Ratio", "Froude Number", "Residuary Resistance"]

# Use python engine so that a delimieter with regex can be used
# The data file separates values with more than one space in some instances
yacht_data = pd.read_csv("datasets/yacht_hydrodynamics.data", delimiter=" +", engine='python', names=titles)
yacht_data.head()

Unnamed: 0,Bouyancy Position,Prismatic Coefficient,Length-Displacement Ratio,Beam-Drought Ratio,Length-Beam Ratio,Froude Number,Residuary Resistance
0,-2.3,0.568,4.78,3.99,3.17,0.125,0.11
1,-2.3,0.568,4.78,3.99,3.17,0.15,0.27
2,-2.3,0.568,4.78,3.99,3.17,0.175,0.47
3,-2.3,0.568,4.78,3.99,3.17,0.2,0.78
4,-2.3,0.568,4.78,3.99,3.17,0.225,1.18


In [4]:
# Normalise data
for _, attr in yacht_data.items():
    dt.normalise(attr)
    
yacht_data.head()

Unnamed: 0,Bouyancy Position,Prismatic Coefficient,Length-Displacement Ratio,Beam-Drought Ratio,Length-Beam Ratio,Froude Number,Residuary Resistance
0,0.54,0.542857,0.55,0.464567,0.483516,0.0,0.001602
1,0.54,0.542857,0.55,0.464567,0.483516,0.076923,0.004166
2,0.54,0.542857,0.55,0.464567,0.483516,0.153846,0.007371
3,0.54,0.542857,0.55,0.464567,0.483516,0.230769,0.012338
4,0.54,0.542857,0.55,0.464567,0.483516,0.307692,0.018747


In [5]:
attr_labels = []
for label in yacht_data.columns:
    attr_labels.append(label)

inst_labels = yacht_data.index
    
print(attr_labels)

['Bouyancy Position', 'Prismatic Coefficient', 'Length-Displacement Ratio', 'Beam-Drought Ratio', 'Length-Beam Ratio', 'Froude Number', 'Residuary Resistance']


In [6]:
# Attributes are columns, instances are rows
ninst, nattr = yacht_data.shape
D = np.zeros((ninst, ninst))

for i in range(ninst):
    i_data = yacht_data.iloc[i, :].values
    for j in range(i, ninst):
        j_data = yacht_data.iloc[j, :].values
        
        dist = dis.eucl_dist(i_data, j_data)
        D[i][j] = D[j][i] = dist
        
if os.path.exists("output"):
    print("Path exists")
else:
    print("Path not found, creating directory")
    os.mkdir("output")
    
np.savetxt("output/YachtInstanceDistMatrix.csv", D, fmt="%.2f", delimiter=",")    

Path exists


In [7]:
D_attr = np.zeros((nattr, nattr))

for i in range(nattr):
    i_data = yacht_data.iloc[:, i].values
    for j in range(i, nattr):
        j_data = yacht_data.iloc[:, j].values
        
        dist = dis.eucl_dist(i_data, j_data)
        D_attr[i][j] = D_attr[j][i] = dist
        
if os.path.exists("output"):
    print("Path exists")
else:
    print("Path not found, creating directory")
    os.mkdir("output")
    
np.savetxt("output/YachtAttributeDistMatrix.csv", D_attr, fmt="%.2f", delimiter=",")    

Path exists


In [8]:
# Generate sets of vertices
V = set(n for n in range(len(D)))
V_attr = set(n for n in range(len(D_attr)))

In [9]:
# Generate graphs

In [10]:
_ = sg.rngraph(V, D, "output/yacht-inst-rng.gml", inst_labels)

In [11]:
_ = sg.rngraph(V_attr, D_attr, "output/yacht-attr-rng.gml", attr_labels)