# Computing Distance Between DAGs of Yeast Data

In this jupyter notebook we do the following:
* Compute the extremal DAGS for SWI4, YOX1, HCM1, NDD1 in the two yeast datasets. The extremal DAGs are named DAG1 and DAG2 respectively.
* Compute the supergraph between DAG1 and DAG2.
* Compute the extremal DAG for SWI4, CLB2, HCM1, NDD1 from the second dataset. We call this DAG3.
* Compute the supergraph between DAG1 and DAG3 where we label CLB2 as YOX1. 

### Uploading datasets and necessary files

In [1]:
# upload and import necessary packages and files
import pandas as pd
import numpy as np
from min_interval_posets.curve import Curve
from min_interval_posets.extremal_DAG import get_extremalDAG
from min_interval_posets import supergraph as sg

In [2]:
# get yeast data into matrix data structure
yeast_dataframe = pd.read_csv("WT1_WT2_microarray_interpolated/wt1_microarray_coregenes_lifepoints_interpol_trim.csv")
yeast_matrix = yeast_dataframe.to_numpy()
yeast_dataframe2 = pd.read_csv("WT1_WT2_microarray_interpolated/wt2_microarray_coregenes_lifepoints_interpol.csv")
yeast_matrix2 = yeast_dataframe2.to_numpy()
x = np.arange(0,265,1)

### Computing the extremal DAG of SWI4, YOX1, HCM1, & NDD1 from the first yeast dataset

In [3]:
SWI4 = [yeast_matrix[5, j] for j in range(1,len(yeast_matrix[0]))]
YOX1 = [yeast_matrix[8, j] for j in range(1,len(yeast_matrix[0]))]
HCM1 = [yeast_matrix[10, j] for j in range(1,len(yeast_matrix[0]))]
NDD1 = [yeast_matrix[12, j] for j in range(1,len(yeast_matrix[0]))]
c1 = Curve(Curve({float(x[i]):SWI4[i] for i in range(len(x))}).normalize())
c2 = Curve(Curve({float(x[i]):YOX1[i] for i in range(len(x))}).normalize())
c3 = Curve(Curve({float(x[i]):HCM1[i] for i in range(len(x))}).normalize())
c4 = Curve(Curve({float(x[i]):NDD1[i] for i in range(len(x))}).normalize())
curves = {"SWI4":c1, "YOX1":c2, "HCM1":c3, "NDD1":c4}
DAG1 = get_extremalDAG(curves)

In [4]:
DAG1

([('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('NDD1', 'max'),
  ('NDD1', 'min'),
  ('NDD1', 'max'),
  ('NDD1', 'min'),
  ('NDD1', 'max'),
  ('NDD1', 'min'),
  ('NDD1', 'max')],
 [(0, 0.5),
  (1, 0.5),
  (2, 0.4876185302014067),
  (3, 0.24774313593371722),
  (4, 0.13870379280075362),
  (5, 0.026570005292854748),
  (6, 0.0014643291225220345),
  (7, 0.0014249522279166371),
  (8, 0.0003406779785679326),
  (9, 0.5),
  (10, 0.5),
  (11, 0.4237889664213448),
  (12, 0.16010731341393894),
  (13, 0.04864232573477517),
  (14, 3.6436526047539664e-05),
  (15, 0.5),
  (16, 0.5),
  (17, 0.4743729884372806),
  (18, 0.23429699938001103),
  (19, 0.076657

### Computing the extremal DAG of SWI4, YOX1, HCM1, & NDD1 from the second yeast dataset

In [4]:
SWI4 = [yeast_matrix2[5, j] for j in range(1,len(yeast_matrix2[0]))]
YOX1 = [yeast_matrix2[8, j] for j in range(1,len(yeast_matrix2[0]))]
HCM1 = [yeast_matrix2[10, j] for j in range(1,len(yeast_matrix2[0]))]
NDD1 = [yeast_matrix2[12, j] for j in range(1,len(yeast_matrix2[0]))]
c1 = Curve(Curve({float(x[i]):SWI4[i] for i in range(len(x))}).normalize())
c2 = Curve(Curve({float(x[i]):YOX1[i] for i in range(len(x))}).normalize())
c3 = Curve(Curve({float(x[i]):HCM1[i] for i in range(len(x))}).normalize())
c4 = Curve(Curve({float(x[i]):NDD1[i] for i in range(len(x))}).normalize())
curves = {"SWI4":c1, "YOX1":c2, "HCM1":c3, "NDD1":c4}
DAG2 = get_extremalDAG(curves)

In [6]:
DAG2

([('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('NDD1', 'max'),
  ('NDD1', 'min'),
  ('NDD1', 'max'),
  ('NDD1', 'min'),
  ('NDD1', 'max'),
  ('NDD1', 'min')],
 [(0, 0.01707515062813897),
  (1, 0.5),
  (2, 0.5),
  (3, 0.47625926261915463),
  (4, 0.19973316712445371),
  (5, 0.005186498594409866),
  (6, 0.005186498594409866),
  (7, 0.15966629961791923),
  (8, 0.02884595915399782),
  (9, 5.601526953752378e-06),
  (10, 0.5),
  (11, 0.5),
  (12, 0.4137716281029566),
  (13, 0.16787066294073283),
  (14, 0.006465615420300191),
  (15, 0.00646561542030019

### Computing the supergraph between DAG1 & DAG2

In [11]:
names = ['SWI4', 'YOX1', 'HCM1', 'NDD1']
supergraph1 = sg.get_supergraph(names, DAG1, DAG2)
supergraph1

([(('SWI4', 'max'), (0, 0.01707515062813897)),
  (('SWI4', 'min'), (0.5, 0.5)),
  (('SWI4', 'max'), (0.5, 0.5)),
  (('SWI4', 'min'), (0.4876185302014067, 0.47625926261915463)),
  (('SWI4', 'max'), (0.24774313593371722, 0.19973316712445371)),
  (('SWI4', 'min'), (0, 0.005186498594409866)),
  (('SWI4', 'max'), (0, 0.005186498594409866)),
  (('SWI4', 'min'), (0.13870379280075362, 0.15966629961791923)),
  (('SWI4', 'max'), (0.026570005292854748, 0.02884595915399782)),
  (('SWI4', 'min'), (0.0014643291225220345, 0)),
  (('SWI4', 'max'), (0.0014249522279166371, 0)),
  (('YOX1', 'max'), (0.0003406779785679326, 5.601526953752378e-06)),
  (('YOX1', 'min'), (0.5, 0.5)),
  (('YOX1', 'max'), (0.5, 0.5)),
  (('YOX1', 'min'), (0.4237889664213448, 0.4137716281029566)),
  (('YOX1', 'max'), (0.16010731341393894, 0.16787066294073283)),
  (('YOX1', 'min'), (0, 0.006465615420300191)),
  (('YOX1', 'max'), (0, 0.006465615420300191)),
  (('YOX1', 'min'), (0.04864232573477517, 0.052748021470061246)),
  (('YOX

### Computing the extremal DAG of SWI4, CLB2, HCM1, and NDD1 from the second yeast dataset
For computing DAG3, we mislabel CLB2 as YOX1 so then we can test to see if the distance between DAG1 and DAG3 is greater than the distance between DAG1 and DAG2.

In [7]:
SWI4 = [yeast_matrix2[5, j] for j in range(1,len(yeast_matrix2[0]))]
CLB2 = [yeast_matrix2[11, j] for j in range(1,len(yeast_matrix2[0]))]
HCM1 = [yeast_matrix2[10, j] for j in range(1,len(yeast_matrix2[0]))]
NDD1 = [yeast_matrix2[12, j] for j in range(1,len(yeast_matrix2[0]))]
c1 = Curve(Curve({float(x[i]):SWI4[i] for i in range(len(x))}).normalize())
c2 = Curve(Curve({float(x[i]):CLB2[i] for i in range(len(x))}).normalize())
c3 = Curve(Curve({float(x[i]):HCM1[i] for i in range(len(x))}).normalize())
c4 = Curve(Curve({float(x[i]):NDD1[i] for i in range(len(x))}).normalize())
curves = {"SWI4":c1, "YOX1":c2, "HCM1":c3, "NDD1":c4}
DAG3 = get_extremalDAG(curves)

In [8]:
DAG3

([('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('SWI4', 'min'),
  ('SWI4', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('YOX1', 'max'),
  ('YOX1', 'min'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('HCM1', 'min'),
  ('HCM1', 'max'),
  ('NDD1', 'max'),
  ('NDD1', 'min'),
  ('NDD1', 'max'),
  ('NDD1', 'min'),
  ('NDD1', 'max'),
  ('NDD1', 'min')],
 [(0, 0.01707515062813897),
  (1, 0.5),
  (2, 0.5),
  (3, 0.47625926261915463),
  (4, 0.19973316712445371),
  (5, 0.005186498594409866),
  (6, 0.005186498594409866),
  (7, 0.15966629961791923),
  (8, 0.02884595915399782),
  (9, 0.5),
  (10, 0.5),
  (11, 0.3428614729087075),
  (12, 0.21049008947633854),
  (13, 0.09430735430938686),
  (14, 0.5),
  (15, 0.5),
  (16, 0.3318692911088038),
  (17, 0.09470700512403982),
  (18, 0.038781620383154164),
  (19, 0.038781620383

### Compute the supergraph between DAG1 and DAG3

In [9]:
names = ['SWI4', 'YOX1', 'CLB2', 'HCM1', 'NDD1']
supergraph2 = sg.get_supergraph(names, DAG1, DAG3)
supergraph2

([(('SWI4', 'max'), (0, 0.01707515062813897)),
  (('SWI4', 'min'), (0.5, 0.5)),
  (('SWI4', 'max'), (0.5, 0.5)),
  (('SWI4', 'min'), (0.4876185302014067, 0.47625926261915463)),
  (('SWI4', 'max'), (0.24774313593371722, 0.19973316712445371)),
  (('SWI4', 'min'), (0, 0.005186498594409866)),
  (('SWI4', 'max'), (0, 0.005186498594409866)),
  (('SWI4', 'min'), (0.13870379280075362, 0.15966629961791923)),
  (('SWI4', 'max'), (0.026570005292854748, 0.02884595915399782)),
  (('SWI4', 'min'), (0.0014643291225220345, 0)),
  (('SWI4', 'max'), (0.0014249522279166371, 0)),
  (('YOX1', 'max'), (0.0003406779785679326, 0)),
  (('YOX1', 'min'), (0.5, 0.5)),
  (('YOX1', 'max'), (0.5, 0.5)),
  (('YOX1', 'min'), (0.4237889664213448, 0.3428614729087075)),
  (('YOX1', 'max'), (0.16010731341393894, 0.21049008947633854)),
  (('YOX1', 'min'), (0.04864232573477517, 0.09430735430938686)),
  (('YOX1', 'max'), (3.6436526047539664e-05, 0)),
  (('HCM1', 'min'), (0.5, 0.5)),
  (('HCM1', 'max'), (0.5, 0.5)),
  (('HCM1

### Summary of results
* dist(DAG1, DAG2) = 10.34258706101124
* dist(DAG1, DAG3) = 15.482883975910779