MDS: Intercity distances
===

Steven Van Vaerenbergh. Master in Data Science, University of Cantabria, 2018-2019.

Adapted from https://chrisalbon.com/python/data_wrangling/pandas_dataframe_importing_csv/

In [None]:
import csv
import numpy as np
import matplotlib.pyplot as plt
from sklearn import manifold
import pandas as pd

# Make the plot outputs appear and be stored within the notebook
%matplotlib inline

In [None]:
# Distance file available from RMDS project:
url = 'https://raw.githubusercontent.com/cheind/rmds/master/examples/european_city_distances.csv'
df = pd.read_csv(url,delimiter=';',header=None)

df.head()

In [None]:
dist = df.iloc[:,1:25]
dist.head()

In [None]:
cities = df.iloc[:,0]
cities[0:5]

Perform MDS
---

In [None]:
adist = dist.as_matrix()
print(adist.shape)

mds = manifold.MDS(n_components=2, dissimilarity="precomputed", random_state=6)
results = mds.fit(adist)

coords = results.embedding_

Plot results
--

In [None]:
fig = plt.figure(figsize=(9,8))
plt.subplots_adjust(bottom = 0.1)
plt.scatter(coords[:, 0], coords[:, 1], marker = 'o')
plt.axis('equal')
for label, x, y in zip(cities, coords[:, 0], coords[:, 1]):
    plt.annotate(
        label,
        xy = (x, y), xytext = (0, 10),
        textcoords = 'offset points', ha = 'center', va = 'bottom')

plt.show()