<a href="https://colab.research.google.com/github/gylab-TAU/collab-notebooks/blob/master/MDS_Tool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title installations
!pip install jupyter-dash --quiet
!pip install pandas --quiet
!pip install scipy --quiet

In [None]:
#@title mount drive and read folders
from google.colab import drive
drive.mount('/content/drive')

root_dir = "/content/drive/My Drive/"
images_dir = root_dir + 'MDS-images/'
results_dir = root_dir + 'MDS-results'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#@title choose csv file
import ipywidgets as widgets
import os

os.chdir(results_dir)
options = os.listdir()

picker = widgets.Dropdown(options=options, value=options[0])

picker


Dropdown(options=('xmeans_temp777.csv', 'xmeans_ppp (1).csv', 'visual_names_familiar_matrix.csv', 'visual_pics…

In [None]:
#@title Clean data
import pandas as pd
import numpy as np
df = pd.read_csv(results_dir + "/" +picker.value)

def check_symmetric(a, tol=1e-8):
    return np.all(np.abs(a-a.T) < tol)

labels = df.columns[1:]

labels_key = df.columns[0]
matrix = df.drop(labels_key, axis=1).fillna(0).to_numpy()

def try_make_matrix_symmetric(mat):
  return mat + mat.T

if (not check_symmetric(matrix)):
  matrix = try_make_matrix_symmetric(matrix)

if (not check_symmetric(matrix)):
    print("Your matrix isn't symmetric")

matrix = pd.DataFrame(matrix)



In [None]:
#@title run MDS
from sklearn.manifold import MDS

mds = MDS(dissimilarity='precomputed')
df = pd.DataFrame(mds.fit_transform(matrix), columns=["x", "y"])
df["image_name"] = labels

ids = [label.split("_")[0] for label in labels]
df["id"] = ids



In [None]:
#@title arrange data for shepard's diagram

import math

title_row = np.ndarray.copy(df['image_name'].to_numpy())
#title_row = np.insert(title_row, 0, None)

adj_matrix = []

for row_index1 in range(len(df)):
  adj_row = []
  for row_index2 in range(len(df)):
    x1 = df.loc[row_index1]["x"]
    x2 = df.loc[row_index2]["x"]
    y1 = df.loc[row_index1]["y"]
    y2 = df.loc[row_index2]["y"]
    
    adj_row.append(((math.sqrt(math.pow((x1-x2), 2) + math.pow((y1-y2), 2)))))
  adj_matrix.append(adj_row)
new_matrix = pd.DataFrame(adj_matrix, index=title_row, columns=title_row)


old_matrix = matrix.copy()
old_matrix.index = labels
old_matrix.columns = labels
old_matrix

shepards_x = []
shepards_y = []
shepards_label = []
for i in range(len(labels)):
  for j in range(i):
    shepards_label.append(labels[i] + ", " + labels[j])
    shepards_x.append(old_matrix[labels[i]][labels[j]])
    shepards_y.append(new_matrix[labels[i]][labels[j]])

shepards_df = pd.DataFrame(list(zip(shepards_label, shepards_x, shepards_y)), columns=["labels", "x", "y"])

In [None]:
#@title get images
import os
import base64

def get_images():
  images64 = {}
  os.chdir(images_dir)
  image_names = os.listdir()

  for image_name in image_names:
    suffix = image_name.split(".")[-1]
    image_file = open(images_dir+ image_name, 'rb')
    image_read = image_file.read()
    
    images64[image_name] = "data:image/" + suffix +";base64, " + base64.b64encode(image_read).decode()

  return images64

images = get_images()

def get_image(image_name):
  return images[image_name]



In [None]:
#@title add colour categoty
def add_category_to_df():
  image_names = df.id.astype('category').to_numpy()
  
  category_dict = {image_names[i]: i for i in range(len(image_names))}

  df["category"] = [category_dict[df.loc[i].id] for i in range(len(df["id"]))] 

add_category_to_df()


In [None]:
#@title draw plot
import plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output, no_update
from dash import html
from dash.dependencies import Input, Output
import plotly.graph_objects as go


figure = go.Figure()
unique_ids = list(set(ids))

for i in range(len(unique_ids)):
  x = df[df["id"] == unique_ids[i]].x.to_numpy()
  y = df[df["id"] == unique_ids[i]].y.to_numpy()
  name = unique_ids[i]
  legendrank = i

  figure.add_trace(go.Scatter(name=name, x=x, y=y, legendrank=legendrank, mode="markers" ))

figure.update_layout(legend= {'itemsizing': 'constant'})

figure.update_traces(hoverinfo="none", hovertemplate=None)
app = JupyterDash(__name__)

app.layout = html.Div(
    className="container",
    children=[
        html.H1("MDS"),
        dcc.Graph(id="graph-5", figure=figure, clear_on_unhover=True),
        dcc.Tooltip(id="graph-tooltip-5", direction='bottom'),
    ],
)

@app.callback(
    Output("graph-tooltip-5", "show"),
    Output("graph-tooltip-5", "bbox"),
    Output("graph-tooltip-5", "children"),
    Input("graph-5", "hoverData"),
)
def display_hover(hoverData):
    if hoverData is None:
        return False, no_update, no_update

    # demo only shows the first point, but other points may also be available
    hover_data = hoverData["points"][0]
    bbox = hover_data["bbox"]
    num = hover_data["pointNumber"]

    image_name = df["image_name"].to_numpy()[num]
    image_url = images[image_name]
    
    children = [
        html.Div([
            html.P(image_name),
            html.Img(
                src=image_url,
                style={"width": "50px", 'display': 'block', 'margin': '0 auto'},
            )
        ])
    ]

    return True, bbox, children

# Run app and display result inline in the notebook
app.run_server(mode='inline', debug=True)

<IPython.core.display.Javascript object>

In [None]:
#@title draw shepard's diagram
import plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output, no_update
from dash import html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from scipy import stats

figure = go.Figure(data=go.Scatter(x = shepards_df["x"], y = shepards_df["y"], 
                                   mode='markers', text = shepards_df["labels"]))
figure.update_layout(xaxis_title = "before MDS", yaxis_title = "after MDS")

pearson = stats.pearsonr(shepards_df["x"].to_numpy(), shepards_df["y"].to_numpy())

correlation = "Pearson's R: " + str(pearson[0])
p_value = "P-value: " + str(pearson[1])

app1 = JupyterDash(__name__)

app1.layout = html.Div(
    className="container",
    children=[
        html.H1("Shepard's Diagram"),
        html.H3(correlation + " " + p_value),
        dcc.Graph(id="graph-5", figure=figure, clear_on_unhover=True)
    ],
)
app1.run_server(mode='inline', debug=True)

<IPython.core.display.Javascript object>