In [324]:
%pip install pandas
%pip install openpyxl
%pip install yfiles_jupyter_graphs


[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.


In [325]:
import pandas as pd
import os
from yfiles_jupyter_graphs import GraphWidget


In [326]:
EPOCH_ROW = 0
CYCLE_ROW = 1
CYCLE_START_ROW = 2
CYCLE_PERIOD_ROW = 3
CYCLE_END_RULE_ROW = 4
TIMING_ROW = 5
VISIT_LABEL_ROW = 6
VISIT_WINDOW_ROW = 7

FIRST_ACTIVITY_ROW = 8

ACTIVITY_COL = 0
CHILD_ACTIVITY_COL = 1
BC_COL = 2
PROFILE_COL = 3
FIRST_VISIT_COL = 4

In [327]:
def get_cycle_cell(df, row_index, col_index):
  is_null = pd.isnull(df.iloc[row_index, col_index])
  if is_null:
    return "", True
  else:
    value = str(df.iloc[row_index, col_index])
    if value.upper() == "-":
      return "", True
    else:
      return value, False

def previous_index(index):
  if index == 0:
    return 0
  else:
    return index - 1

def build_cycle_record(df, index, col_index, condition):
  cycle_start_index = index
  cycle_start, is_null = get_cycle_cell(df, CYCLE_START_ROW, col_index)
  cycle_period, is_null = get_cycle_cell(df, CYCLE_PERIOD_ROW, col_index)
  cycle_end_rule, is_null = get_cycle_cell(df, CYCLE_END_RULE_ROW, col_index)
  return { 
    'start_index': cycle_start_index, 
    'condition': condition, 
    'start': cycle_start, 
    'period': cycle_period, 
    'end_rule': cycle_end_rule 
  }


In [328]:
def extract_cycles(df):
  cycles = []
  timepoint_index = -1
  cycle_start_index = None
  in_cycle = False
  prev_cycle = None
  for col_index in range(df.shape[1]):
    if col_index >= FIRST_VISIT_COL:
      timepoint_index += 1
      cycle, cycle_is_null = get_cycle_cell(df, CYCLE_ROW, col_index)
      if cycle_is_null:
        if in_cycle:
          cycle_record['end_index'] = previous_index(timepoint_index)
          cycles.append(cycle_record)
          in_cycle = False
        else:
          pass # Do nothing
      else:
        cycle = str(cycle)
        if not in_cycle:
          in_cycle = True
          cycle_record = build_cycle_record(df, timepoint_index, col_index, cycle)
        elif prev_cycle == cycle:
          pass # Do nothing
        else:
          cycle_record['end_index'] = previous_index(timepoint_index)
          cycles.append(cycle_record)
          cycle_record = build_cycle_record(df, timepoint_index, col_index, cycle)
      prev_cycle = cycle
  return cycles


In [329]:
def get_timing_cell(df, row_index, col_index):
  is_null = pd.isnull(df.iloc[row_index, col_index])
  if is_null:
    return "", True
  else:
    return df.iloc[row_index, col_index], False

def get_relative_ref(part):
  if len(part) > 1:
    print("INT", part, part[1:])
    return int(part[1:])
  else:
    return 1

def get_timing_type(df, col_index):
  timing_type = ""
  rel_ref = 0
  timing_value = ""
  timing_info, timing_info_is_null = get_timing_cell(df, TIMING_ROW, col_index)
  if not timing_info_is_null:
    timing_parts = timing_info.split(":")
    if timing_parts[0].upper()[0] == "A":
      timing_type = "anchor"
      rel_ref = 0
    if timing_parts[0].upper()[0] == "P":
      timing_type = "previous"
      rel_ref = get_relative_ref(timing_parts[0]) * -1
    elif timing_parts[0].upper()[0] == "N":
      timing_type = "next"
      rel_ref = get_relative_ref(timing_parts[0])
    if len(timing_parts) == 2:
      timing_value = timing_parts[1].strip()
  print("TIMING: col_index (%s) - FIRST_VISIT_COL (%s) + rel_ref (%s)" % (col_index, FIRST_VISIT_COL, rel_ref))
  return { 'type': timing_type, 'ref': col_index - FIRST_VISIT_COL + rel_ref, 'value': timing_value }

In [330]:
def extract_timepoints(df):
  timepoints = []
  for col_index in range(df.shape[1]):
    if col_index >= FIRST_VISIT_COL:
      record = get_timing_type(df, col_index)
      timepoints.append(record)
  return timepoints

In [331]:
def get_encounter_cell(df, row_index, col_index):
  is_null = pd.isnull(df.iloc[row_index, col_index])
  if is_null:
    return "", True
  else:
    return df.iloc[row_index, col_index], False

def get_encounter_details(df, col_index):
  label = ""
  window = ""
  label, label_is_null = get_encounter_cell(df, VISIT_LABEL_ROW, col_index)
  window, window_is_null = get_encounter_cell(df, VISIT_WINDOW_ROW, col_index)
  return { 'label': label, 'window': window }

In [332]:
def extract_encounters(df):
  encounters = []
  for col_index in range(df.shape[1]):
    if col_index >= FIRST_VISIT_COL:
      record = get_encounter_details(df, col_index)
      encounters.append(record)
  return encounters

In [333]:
def get_activity_cell(df, row_index, col_index):
  is_null = pd.isnull(df.iloc[row_index, col_index])
  if is_null:
    return "", True
  else:
    return df.iloc[row_index, col_index], False

In [334]:
def extract_activities_and_bcs(df):
  activities = []
  activities_bc_map = {}
  row_activities_map = []
  prev_activity = None
  for row_index, col_def in df.iterrows():
    if row_index >= FIRST_ACTIVITY_ROW:
      activity, activity_is_null = get_activity_cell(df, row_index, CHILD_ACTIVITY_COL)
      if activity_is_null:
        if not prev_activity == None:
          row_activities_map.append(prev_activity)
      else:
        activities.append(activity)
        row_activities_map.append(activity)
      prev_activity = activity
      bc, bc_is_null = get_activity_cell(df, row_index, BC_COL)
      if not bc_is_null:
        if not activity in activities_bc_map:
          activities_bc_map[activity] = { 'bc': [] }  
        activities_bc_map[activity]['bc'].append(bc)
  return { 
    'activities': activities,
    'activity_bc_map': activities_bc_map,
    'row_activities_map': row_activities_map
  }

def extract_timepoint_activities_map(df, timepoints, activities, row_activities_map):
  timepoint_activity_map = []
  activity_dict = {}
  for activity in activities:
    activity_dict[activity] = False
  for tp in timepoints:
    timepoint_activity_map.append(activity_dict)
  for index in range(df.shape[1]):
    if index >= FIRST_VISIT_COL:
      column = df.iloc[:, index]
      row = 0
      for col in column:
        if row >= FIRST_ACTIVITY_ROW:
          if not pd.isnull(col):
            if col.upper() == "X":
              activity = row_activities_map[row - FIRST_ACTIVITY_ROW]
              tp_index = index - FIRST_VISIT_COL
              timepoint_activity_map[tp_index][activity] = True
        row += 1
  return timepoint_activity_map


In [335]:
class DDFJson():
  
  def __init__(self):
    self.id_index = { 'entry': 0, 'exit': 0, 'timepoint': 0, 'timeline': 0, 'timing': 0 }
    self.dicts = {}

  def increment_index(self, name):
    self.id_index[name] += 1

  def build_id(self, name):
    self.increment_index(name)
    return "%s_%s" % (name, self.id_index[name])

  def add_entry(self, description, timepoint_id):
    id = self.build_id('entry')
    result = { 'entryId': id, 'entryDescription': description, 'timepoint': timepoint_id }
    self.dicts[id] = result
    return result

  def add_exit(self):
    id = self.build_id('exit')
    result = { 'exitId': id }
    self.dicts[id] = result
    return result

  def add_timepoint(self, previous_timepoint_id, timing):
    id = self.build_id('timepoint')
    result = { 'timepointId': id, 'nextTimepointId': None, 'scheduledAt': timing }
    self.dicts[id] = result
    if not previous_timepoint_id == None:
      self.dicts[previous_timepoint_id]['nextTimepointId'] = id
    return result

  def add_previous_timing(self, value, relative_to_from, window, to_id):
    id = self.build_id('timing')
    result = { 'timingId': id, 'type': "after", 'value': value, 'relativeToFrom': relative_to_from, 'window': window, 'relativeTo': to_id }
    self.dicts[id] = result
    return result

  def add_next_timing(self, value, relative_to_from, window, to_id):
    id = self.build_id('timing')
    result = { 'timingId': id, 'type': "next", 'value': value, 'relativeToFrom': relative_to_from, 'window': window, 'relativeTo': to_id }
    self.dicts[id] = result
    return result

  def add_anchor_timing(self, value):
    id = self.build_id('timing')
    result = { 'timingId': id, 'type': "anchor", 'value': value, 'relativeToFrom': None, 'window': None, 'relativeTo': None }
    self.dicts[id] = result
    return result

  def add_timeline(self, entry, timepoints, exit):
    id = self.build_id('timeline')
    result = { 'timelineId': self.build_id('timeline'), 'timelineEntry': entry, 'timelineTimepoints': timepoints, 'timelineExit': exit }
    return result
  
  def process_timepoints(self, timepoints):
    tps = []
    timing = []
    previous_tp_id = None
    for timepoint in timepoints:
      tps.append(self.add_timepoint(previous_tp_id, None))
      previous_tp_id = tps[-1]['timepointId']
    for timepoint in timepoints:
      if timepoint['type'] == 'next':
        timing.append(self.add_next_timing(timepoint['value'], 'StartToStart', None, tps[timepoint['ref']]['timepointId']))
      elif timepoint['type'] == 'previous':
        timing.append(self.add_previous_timing(timepoint['value'], 'StartToStart', None, tps[timepoint['ref']]['timepointId']))
      elif timepoint['type'] == 'anchor':
        timing.append(self.add_anchor_timing(timepoint['value']))
      elif timepoint['type'] == '':
        timing.append({})
    for index, tp in enumerate(tps):
      tp['scheduledAt'] = timing[index]
    entry = self.add_entry('Main timeline', tps[0]['timepointId'])
    exit = self.add_exit()
    tps[-1]['exit'] = exit
    return self.add_timeline(entry, tps, exit)
  


In [336]:
class DDFVisual():

  def __init__(self):
    self.nodes = []
    self.edges = []
    self.node_index = 1
    self.edge_index = 1
    self.node_id_map = {}
    self.label_map = { 
      'entry': 'entryId',
      'exit': 'exitId',
      'timeline': 'timelineId',
      'timepoint': 'timepointId',
      'timing': 'timingId'
    }
    
  def get_id_and_klass(self, node):
    value = {key:val for key, val in node.items() if key.endswith('Id')}
    print("V1", node, value)
    key = list(value.keys())[0]
    print("V2", key)
    return key, key.replace("Id", "")

  def process_json(self, json):
    self.process_node(json)
    return self.nodes, self.edges
    
  def process_node(self, node):
    if node == {}:
      return None
    id, klass = self.get_id_and_klass(node)
    properties = {}
    for key, value in node.items():
      print("T:", type(value))
      if type(value) == list:
        for item in value:
          item_node_index = self.process_node(item)
          if not item_node_index == None:
            self.edges.append( {id: self.edge_index, 'start': self.node_index, 'end': item_node_index, 'properties': {'label': key}})
            self.edge_index += 1
      elif type(value) == dict:
        item_node_index = self.process_node(value)
        if not item_node_index == None:
          self.edges.append( {id: self.edge_index, 'start': self.node_index, 'end': item_node_index, 'properties': {'label': key}})
          self.edge_index += 1
      else:
        properties[key] = value
    properties['node_type'] = klass
    properties['label'] = node[self.label_map[klass]]
    self.nodes.append( { 'id': self.node_index, 'properties': properties } )
    self.node_id_map[id] = self.node_index
    return_value = self.node_index
    self.node_index += 1
    return return_value
  


In [337]:
notebook_path = os.path.abspath("notebook.ipynb")
file_path = os.path.join(os.path.dirname(notebook_path), "source_data/berber_1_v2.xlsx")
#file_path = os.path.join(os.path.dirname(notebook_path), "source_data/Roche Phase 3 NCT04320615.xlsx")
df = pd.read_excel(file_path, header=None)
df = df.fillna(method='ffill', axis=1)
cycles = extract_cycles(df)
timepoints = extract_timepoints(df)
encounters = extract_encounters(df)
activities = extract_activities_and_bcs(df)
tp_activities = extract_timepoint_activities_map(df, timepoints, activities['activities'], activities['row_activities_map'])
print("CYCLES", cycles)
print("TIMEPOINTS", timepoints)
print("ENCOUNTERS", encounters)
print("ACTIVITIES", activities)
print("TP ACTIVITIES", tp_activities)

x = DDFJson()
json = x.process_timepoints(timepoints)
print("X:", json)

y = DDFVisual()
nodes, edges = y.process_json(json)

TIMING: col_index (4) - FIRST_VISIT_COL (4) + rel_ref (1)
TIMING: col_index (5) - FIRST_VISIT_COL (4) + rel_ref (0)
TIMING: col_index (6) - FIRST_VISIT_COL (4) + rel_ref (-1)
TIMING: col_index (7) - FIRST_VISIT_COL (4) + rel_ref (0)
TIMING: col_index (8) - FIRST_VISIT_COL (4) + rel_ref (-1)
INT P4 4
TIMING: col_index (9) - FIRST_VISIT_COL (4) + rel_ref (-4)
INT P1 1
TIMING: col_index (10) - FIRST_VISIT_COL (4) + rel_ref (-1)
TIMING: col_index (11) - FIRST_VISIT_COL (4) + rel_ref (0)
TIMING: col_index (12) - FIRST_VISIT_COL (4) + rel_ref (0)
TIMING: col_index (13) - FIRST_VISIT_COL (4) + rel_ref (-1)
TIMING: col_index (14) - FIRST_VISIT_COL (4) + rel_ref (-1)
CYCLES [{'start_index': 1, 'condition': '1', 'start': 'Day 1', 'period': '15 Days', 'end_rule': '', 'end_index': 2}, {'start_index': 3, 'condition': '2', 'start': 'Day 16', 'period': '15 Days', 'end_rule': '', 'end_index': 4}, {'start_index': 5, 'condition': '3', 'start': 'Day 31', 'period': '15 Days', 'end_rule': '', 'end_index': 

In [338]:

def custom_node_color(index: int, node: dict):
  if 'node_type' in node['properties']:
    if node['properties']['node_type'] == 'entry':
      return 'black'
    if node['properties']['node_type'] == 'exit':
      return 'black'
    elif node['properties']['node_type'] == 'timeline':
      return 'blue'
    elif node['properties']['node_type'] == 'timepoint':
      return 'black'
    else:
      return 'white'
  else: 
    return 'white'

def custom_node_style(index: int, node: dict):
  if 'node_type' in node['properties']:
    if node['properties']['node_type'] == 'entry' or node['properties']['node_type'] == 'exit':
      return {'image': 'https://raw.githubusercontent.com/data4knowledge/timepoints/main/images/pill_black.svg'}
    elif node['properties']['node_type'] == 'anchor':
      return { 'shape': 'hexagon2' }
    elif node['properties']['node_type'] == 'timeline':
      return { 'shape': 'ellipse' }
    elif node['properties']['node_type'] == 'timepoint':
      return { 'shape': 'ellipse' }
    else:
      return { 'shape': 'ellipse' }
  else: 
    return { 'shape': 'ellipse' }

widget = GraphWidget()
widget.orthogonal_layout()
widget.set_directed(True)

widget.set_nodes(nodes)
widget.set_edges(edges)
widget.set_node_color_mapping(custom_node_color)
widget.set_node_styles_mapping(custom_node_style)
widget

GraphWidget(layout=Layout(height='500px', width='100%'))