In [None]:
import csv
import json
import numpy as np
from collections import Counter, defaultdict
import itertools
import os

# Category Setting

# Action Categories
'Add note'-
'Connection'
'Create Note'-
'Doc_open'-
'Draging'-
'Highlight'-
'Mouse_hover'-
'Reading'-
'Search'-
'Think_aloud'-
'Topic_change'-

## Exploratory
### Data Exploration
- Search
- Reading
- Doc_open
- Topic_change
### Visual Exploration
- Draging
- Mouse_hover
## Insight Action
- Think_aloud
- Highlight
- Create Note
- Add note
- Connection
## Theorizing (Data to Visual)
## Discovering (Data to Insight)
## Auditing (Insight to Data or anything to topic change)
## Organizing (Insight to Visual)
## Recognizing (Visual to Insight)
## Tracking (Visual to Data)
## Meta (Discarded)
- So far none, but add logic for easy reconfig later

Update below to accommodate different action word encodings

In [None]:
# Parameterizable Category Definitions

'''
TODO: Add a sys.args parser to populate below variables

DataExplore = []
Visual = []
Insight = []
Theorizing = []
Discover = []
Audit = []
Organize = []
Recognize = []
Track = []
'''

# For Datasets 1-3
DataExplore = ['Search', 'Reading', 'Doc_open', 'Topic_change']
Visual = ['Draging', 'Mouse_hover']
Insight = ['Think_aloud', 'Highlight', 'Create Note', 'Add note', 'Connection']
# Theorizing = []
# Discover = []
# Audit = []
# Organize = []
# Recognize = []
# Track = []

# Data Clean-up

Note for the line - if 'Type' in t:
Likely need pre-data file clean-up or make explicit what the column name is for other data sets. For instance, the UK dataset has multiple column headers that contain "Type"

In [None]:
def json_to_csv(directory, filepath):
    with open(os.path.join(directory, filepath)) as json_file:
        data = json.load(json_file)

    interaction_data = data

    data_file = open(filepath + '.csv', 'w')

    csv_writer = csv.writer(data_file)

    count = 0
    for interaction in interaction_data:
        if count == 0:
            header = interaction.keys()
            for t in header:
                if 'Type' in t: #for UK dataset, likely need to change to specifically "Event Type" since there are multiple header columns with "Type" in the name
                    t = 'InteractionType'
            csv_writer.writerow(header)
            count += 1

        csv_writer.writerow(interaction.values())

    data_file.close()

# Probabilities and Markov Model Calculations

In [None]:
def ratioMaker(activities):
  stuff, activity_counts = np.unique(activities, return_counts=True)
  return [i/len(activities) for i in activity_counts]

In [None]:
def state_collecting(csv_file):
    data = open(csv_file, 'r')

    csv_data = csv.DictReader(data)
    # data_lines = list(csv_data)

    activities = []

    for row in csv_data: #data_lines[1:]:
        activities.append(row['InteractionType'])

    activity_list = (map(str, activities))

    ratios = ratioMaker(activities)
    # print("ratios: ", ratios)
    # print("Ratio average: ", np.average(ratios))

    activities = list(zip(activities, activities[1:])) #Create list of single-step interaction transitions

    state_types = np.unique(activities)
    state_count = len(state_types)

    # List of all single-step transition types possible given all interaction labels, including A->B, B->A and A->A transitions
    transition_types = np.unique(list(itertools.product(state_types, state_types)), axis=0)

    #state_transitions are the list of unique single-step transitions present in the dataset, transition_counts is the number of times each state_transition appears in the log
    state_transitions, transition_counts = np.unique(activities, return_counts=True, axis=0)

    return activities, state_types, state_count, transition_types, state_transitions, transition_counts


In [None]:
def transition_matrix(transition_counts, transition_types, state_count, state_transitions, activities):
    n = state_count #number of states

    m = np.zeros((n,n))#[0]*n for _ in range(n))
    labels = np.empty((n,n), dtype=object)

    y = 0
    for x in state_transitions:
      for i in range(state_count):
        for j in range(state_count):
            labels[i,j] = transition_types[i*state_count+j]
            if all(x == transition_types[i*state_count+j]):
                m[i][j] = transition_counts[y]
      y += 1

    # for row in m: print(' '.join('{0:.3f}'.format(x) for x in row))

    #now convert to probabilities:
    for row in m:
        s = sum(row)
        if s > 0:
            row[:] = [f/s for f in row]
    return m, labels



### Note for the next few blocks:
In the Visual Start State sections, there are spots that are hard-coded to individual event names, this is because of the weirdness we had with Topic_change in datasets 1-3. This can likely be modularized for the other sets, and feel free to change the categorizing rules to eliminate this scenario if you want.

In [None]:
def mask_labels(labels):
    categorized = np.empty(labels.shape, dtype=object)
    for i in range(labels.shape[0]):
        for j in range(labels.shape[1]):
            x = labels[i][j]
            # Data Start State
            if any(x[0] == data for data in DataExplore):
                # Data to Data
                if any(x[1] == data for data in DataExplore):
                    categorized[i][j] = 'DataExploration'
                # Data to Visual
                elif any(x[1] == vis for vis in Visual):
                    categorized[i][j] = 'Theorizing'
                # Data to Insight
                elif any(x[1] == idea for idea in Insight):
                    categorized[i][j] = 'Discovering'

            # Visual Start State
            elif any(x[0] == vis for vis in Visual):
                # Visual to Visual
                if any(x[1] == vis for vis in Visual):
                    categorized[i][j] = 'VisualExploration'
                # Visual to Data
                elif x[1] == 'Search' or x[1] == 'Reading' or x[1] == 'Doc_open': # Hard-coded part
                    categorized[i][j] = 'Tracking'
                # Visual to Insight
                elif any(x[1] == idea for idea in Insight):
                    categorized[i][j] = 'Recognizing'
                # Visual to New Topic
                elif x[1] == 'Topic_change': # Hard-coded part
                    categorized[i][j] = 'Auditing'

            # Insight Start State
            elif any(x[0] == idea for idea in Insight):
                # Insight to Insight
                if any(x[1] == idea for idea in Insight):
                    categorized[i][j] = 'InsightAction'
                # Insight to Data
                elif any(x[1] == data for data in DataExplore):
                    categorized[i][j] = 'Auditing'
                # Insight to Visual
                elif any(x[1] == vis for vis in Visual):
                    categorized[i][j] = 'Organizing'
    return categorized

In [None]:
def categorize_actions_row_average(activities, transition_counts, m):
    categorized = []

    '''
    Threshold: 20%
            Data[0] | Visual[1] | Insight[2] | Theorize (D-V)[3] | Discovery (D-I)[4] | New Idea (I-D or TC)[5] | Organize (I-V)[6] | Pattern (V-I)[7] | Trail (V-D)[8]
    Usual
    Unusual
    '''
    expectedness = np.zeros(shape=(9,2))

    for x in activities:
        # Data Start State
        if any(x[0] == data for data in DataExplore):
            # Data to Data
            if any(x[1] == data for data in DataExplore):
                categorized.append('DataExploration')
            # Data to Visual
            elif any(x[1] == vis for vis in Visual):
                categorized.append('Theorizing')
            # Data to Insight
            elif any(x[1] == idea for idea in Insight):
                categorized.append('Discovering')

        # Visual Start State
        elif any(x[0] == vis for vis in Visual):
            # Visual to Visual
            if any(x[1] == vis for vis in Visual):
                categorized.append('VisualExploration')
            # Visual to Data
            elif x[1] == 'Search' or x[1] == 'Reading' or x[1] == 'Doc_open': # Hard-coded part
                categorized.append('Tracking')
            # Visual to Insight
            elif any(x[1] == idea for idea in Insight):
                categorized.append('Recognizing')
            # Visual to New Topic
            elif x[1] == 'Topic_change': # Hard-coded part
                categorized.append('Auditing')

        # Insight Start State
        elif any(x[0] == idea for idea in Insight):
            # Insight to Insight
            if any(x[1] == idea for idea in Insight):
                categorized.append('InsightAction')
            # Insight to Data
            elif any(x[1] == data for data in DataExplore):
                categorized.append('Auditing')
            # Insight to Visual
            elif any(x[1] == vis for vis in Visual):
                categorized.append('Organizing')

        # Count expectedness
        for i in range(state_count):
            for j in range(state_count):
                if all(x == transition_types[i*state_count+j]) and m[i][j] >= np.average([x for x in m[i] if x != 0]):
                    if categorized[-1] == 'DataExploration':
                        expectedness[0][0] += 1
                    elif categorized[-1] == 'VisualExploration':
                        expectedness[1][0] += 1
                    elif categorized[-1] == 'InsightAction':
                        expectedness[2][0] += 1
                    elif categorized[-1] == 'Theorizing':
                        expectedness[3][0] += 1
                    elif categorized[-1] == 'Discovering':
                        expectedness[4][0] += 1
                    elif categorized[-1] == 'Auditing':
                        expectedness[5][0] += 1
                    elif categorized[-1] == 'Organizing':
                        expectedness[6][0] += 1
                    elif categorized[-1] == 'Recognizing':
                        expectedness[7][0] += 1
                    elif categorized[-1] == 'Tracking':
                        expectedness[8][0] += 1
                elif all(x == transition_types[i*state_count+j]) and m[i][j] < np.average([x for x in m[i] if x != 0]):
                    if categorized[-1] == 'DataExploration':
                        expectedness[0][1] += 1
                    elif categorized[-1] == 'VisualExploration':
                        expectedness[1][1] += 1
                    elif categorized[-1] == 'InsightAction':
                        expectedness[2][1] += 1
                    elif categorized[-1] == 'Theorizing':
                        expectedness[3][1] += 1
                    elif categorized[-1] == 'Discovering':
                        expectedness[4][1] += 1
                    elif categorized[-1] == 'Auditing':
                        expectedness[5][1] += 1
                    elif categorized[-1] == 'Organizing':
                        expectedness[6][1] += 1
                    elif categorized[-1] == 'Recognizing':
                        expectedness[7][1] += 1
                    elif categorized[-1] == 'Tracking':
                        expectedness[8][1] += 1

    return expectedness

In [None]:
def interaction_category_counts(state_transitions):
    interaction_counts = np.zeros(shape=(9,1))

    h = 0
    #Topic change only considered for data to data and insight to data
    for x in state_transitions:
        if any(x[0] == data for data in DataExplore):
            if any(x[1] == data for data in DataExplore):
                interaction_counts[0] += transition_counts[h]
            elif any(x[1] == vis for vis in Visual):
                interaction_counts[3] += transition_counts[h]
            elif any(x[1] == idea for idea in Insight):
                interaction_counts[4] += transition_counts[h]

        elif any(x[0] == vis for vis in Visual):
            if any(x[1] == vis for vis in Visual):
                interaction_counts[1] += transition_counts[h]
            elif x[1] == 'Search' or x[1] == 'Reading' or x[1] == 'Doc_open': # Hard-coded part
                interaction_counts[8] += transition_counts[h]
            elif any(x[1] == idea for idea in Insight):
                interaction_counts[7] += transition_counts[h]
            elif x[1] == 'Topic_change': # Hard-coded part
                interaction_counts[5] += transition_counts[h]

        elif any(x[0] == idea for idea in Insight):
            if any(x[1] == idea for idea in Insight):
                interaction_counts[2] += transition_counts[h]
            elif any(x[1] == data for data in DataExplore):
                interaction_counts[5] += transition_counts[h]
            elif any(x[1] == vis for vis in Visual):
                interaction_counts[6] += transition_counts[h]
        h += 1

    return interaction_counts

# Run on DataSets

In [None]:
directory = '/content/gdrive/MyDrive/Research/INDIE_Provanance/Dataset1'
filepath = '/content/gdrive/MyDrive/Research/INDIE_Provanance/Dataset1/Arms_P3_InteractionsLogs.json'
csv_file = 'arms1.csv'

## Perform on all participants in directory

In [None]:
FullSetProbabilities = []
for filename in sorted(os.listdir(directory)):
    print(filename + "\n")
    json_to_csv(directory, filename)
    activities, state_types, state_count, transition_types, state_transitions, transition_counts = state_collecting(filename+'.csv')

    m, labels = transition_matrix(transition_counts, transition_types, state_count, state_transitions, activities)
    print("\n")
    # for row in m: print(' '.join('{0:.3f}'.format(x) for x in row))
    # for row in labels: print(' '.join('{}'.format(x) for x in row))

    label_masked = mask_labels(labels)
    # for row in label_masked: print(' '.join('{}'.format(x) for x in row))
    expected2 = categorize_actions_row_average(activities, transition_counts, m)
    inters = interaction_category_counts(state_transitions)

    print(inters)
    print(expected2)
    percentages = expected2/inters
    percentage_Totals = (sum(expected2)/sum(inters))
    percentages.resize((10,2))
    percentages[-1] = percentage_Totals
    print(percentages)

    percentages = percentages.transpose()
    print(percentages)
    print(percentages.flatten())

    ParticipantData = []
    ParticipantData.append(filename)
    for x in inters.flatten():
        ParticipantData.append(x)
    for y in percentages.flatten():
        ParticipantData.append(y)
    FullSetProbabilities.append(ParticipantData)

    data_file = open('probabilities' + filename + '.csv', 'w')

    csv_writer = csv.writer(data_file)

    headings = ["DataExploration", "VisualExploration", "InsightAction", "Theorizing", "Discovering", "Auditing", "Organizing", "Recognizing", "Tracking", "Total"]
    count = 0
    for i in percentages:
      if count == 0:
        header = ["DataExploration", "VisualExploration", "InsightAction", "Theorizing", "Discovering", "Auditing", "Organizing", "Recognizing", "Tracking", "Total"]
        csv_writer.writerow(header)
        count += 1
      csv_writer.writerow(i)

    data_file.close()

print(FullSetProbabilities)
data_file = open('probabilities' + directory[-1] + '.csv', 'w')

csv_writer = csv.writer(data_file)

headings = ["DataExploration", "VisualExploration", "InsightAction", "Theorizing", "Discovering", "Auditing", "Organizing", "Recognizing", "Tracking", "Total"]
count = 0
for i in FullSetProbabilities:
    if count == 0:
        header = ["FileName", "DataExplorationCounts", "VisualExplorationCounts", "InsightActionCounts", "TheorizingCounts", "DiscoveringCounts", "AuditingCounts", "OrganizingCounts", "RecognizingCounts", "TrackingCounts",
                  "DataExploringUsual", "VisualExploringUsual", "InsightActionUsual", "TheorizingUsual", "DiscoveringUsual", "AuditingUsual", "OrganizingUsual", "RecognizingUsual", "TrackingUsual", "TotalUsual",
                  "DataExploringUnusual", "VisualExploringUnusual", "InsightActionUnusual", "TheorizingUnusual", "DiscoveringUnusual", "AuditingUnusual", "OrganizingUnusual", "RecognizingUnusual", "TrackingUnusual", "TotalUnusual"]
        csv_writer.writerow(header)
        count += 1
    csv_writer.writerow(i)

data_file.close()

Arms_P1_InteractionsLogs.json



[[204.]
 [144.]
 [ 49.]
 [101.]
 [ 80.]
 [ 67.]
 [ 67.]
 [ 52.]
 [114.]]
[[152.  52.]
 [144.   0.]
 [ 34.  15.]
 [ 51.  50.]
 [ 44.  36.]
 [ 35.  32.]
 [ 58.   9.]
 [ 26.  26.]
 [ 84.  30.]]
[[0.74509804 0.25490196]
 [1.         0.        ]
 [0.69387755 0.30612245]
 [0.5049505  0.4950495 ]
 [0.55       0.45      ]
 [0.52238806 0.47761194]
 [0.86567164 0.13432836]
 [0.5        0.5       ]
 [0.73684211 0.26315789]
 [0.71526196 0.28473804]]
[[0.74509804 1.         0.69387755 0.5049505  0.55       0.52238806
  0.86567164 0.5        0.73684211 0.71526196]
 [0.25490196 0.         0.30612245 0.4950495  0.45       0.47761194
  0.13432836 0.5        0.26315789 0.28473804]]
[0.74509804 1.         0.69387755 0.5049505  0.55       0.52238806
 0.86567164 0.5        0.73684211 0.71526196 0.25490196 0.
 0.30612245 0.4950495  0.45       0.47761194 0.13432836 0.5
 0.26315789 0.28473804]
Arms_P2_InteractionsLogs.json



[[192.]
 [255.]
 [ 15.]
 [115.]
 [ 50.]
 [ 41.]
 [ 

## Run on single participant for testing

In [None]:
directory = '/content/gdrive/MyDrive/Research/INDIE_Provanance/Dataset1/'
filename = 'Arms_P1_InteractionsLogs.json'

print(filename + "\n")
json_to_csv(directory, filename)
activities, state_types, state_count, transition_types, state_transitions, transition_counts = state_collecting(filename+'.csv')

m, labels = transition_matrix(transition_counts, transition_types, state_count, state_transitions, activities)
print("\n")
# for row in m: print(' '.join('{0:.3f}'.format(x) for x in row))
# for row in labels: print(' '.join('{}'.format(x) for x in row))

label_masked = mask_labels(labels)
# for row in label_masked: print(' '.join('{}'.format(x) for x in row))
expected2 = categorize_actions_row_average(activities, transition_counts, m)
inters = interaction_category_counts(state_transitions)

print(inters)
print(expected2)
percentages = expected2/inters
percentage_Totals = (sum(expected2)/sum(inters))
percentages.resize((10,2))
percentages[-1] = percentage_Totals
print(percentages)

percentages = percentages.transpose()

ParticipantData = []
for x in inters.flatten():
    ParticipantData.append(x)
for y in percentages.flatten():
    ParticipantData.append(y)

prob_file = open('probabilities' + filename + '.csv', 'w')
csv_writer = csv.writer(prob_file)

header = ["DataExplorationCounts", "VisualExplorationCounts", "InsightActionCounts", "TheorizingCounts", "DiscoveringCounts", "AuditingCounts", "OrganizingCounts", "RecognizingCounts", "TrackingCounts",
            "DataExploringUsual", "VisualExploringUsual", "InsightActionUsual", "TheorizingUsual", "DiscoveringUsual", "AuditingUsual", "OrganizingUsual", "RecognizingUsual", "TrackingUsual", "TotalUsual",
            "DataExploringUnusual", "VisualExploringUnusual", "InsightActionUnusual", "TheorizingUnusual", "DiscoveringUnusual", "AuditingUnusual", "OrganizingUnusual", "RecognizingUnusual", "TrackingUnusual", "TotalUnusual"]
csv_writer.writerow(header)

csv_writer.writerow(ParticipantData)

prob_file.close()

data_file = open('MarkovValues' + filename + '.csv', 'w')

csv_writer = csv.writer(data_file)

count = 0
for i in m:
  if count == 0:
    header = state_types.flatten()
    csv_writer.writerow(header)
    count += 1

  csv_writer.writerow(i)

data_file.close()

Arms_P1_InteractionsLogs.json



[[204.]
 [144.]
 [ 49.]
 [101.]
 [ 80.]
 [ 67.]
 [ 67.]
 [ 52.]
 [114.]]
[[152.  52.]
 [144.   0.]
 [ 34.  15.]
 [ 51.  50.]
 [ 44.  36.]
 [ 35.  32.]
 [ 58.   9.]
 [ 26.  26.]
 [ 84.  30.]]
[[0.74509804 0.25490196]
 [1.         0.        ]
 [0.69387755 0.30612245]
 [0.5049505  0.4950495 ]
 [0.55       0.45      ]
 [0.52238806 0.47761194]
 [0.86567164 0.13432836]
 [0.5        0.5       ]
 [0.73684211 0.26315789]]
[0.71526196 0.28473804]


# Visualization Attempts (Ignore these, as they don't work)

In [None]:
### Attempt to Visualize States ###

import numpy as np
import matplotlib.patches as mpatches
from matplotlib.collections import PatchCollection
import matplotlib.pyplot as plt

%matplotlib inline


class Node():

    def __init__(
        self, center, radius, label,
        facecolor='#2693de', edgecolor='#e6e6e6',
        ring_facecolor='#a3a3a3', ring_edgecolor='#a3a3a3'
        ):
        """
        Initializes a Markov Chain Node(for drawing purposes)
        Inputs:
            - center : Node (x,y) center
            - radius : Node radius
            - label  : Node label
        """
        self.center = center
        self.radius = radius
        self.label  = label

        # For convinience: x, y coordinates of the center
        self.x = center[0]
        self.y = center[1]

        # Drawing config
        self.node_facecolor = facecolor
        self.node_edgecolor = edgecolor

        self.ring_facecolor = ring_facecolor
        self.ring_edgecolor = ring_edgecolor
        self.ring_width = 0.03

        self.text_args = {
            'ha': 'center',
            'va': 'center',
            'fontsize': 16
        }


    def add_circle(self, ax):
        """
        Add the annotated circle for the node
        """
        circle = mpatches.Circle(self.center, self.radius)
        p = PatchCollection(
            [circle],
            edgecolor = self.node_edgecolor,
            facecolor = self.node_facecolor
        )
        ax.add_collection(p)
        ax.annotate(
            self.label,
            xy = self.center,
            color = '#ffffff',
            **self.text_args
        )


    def add_self_loop(self, ax, prob=None, direction='up'):
        """
        Draws a self loop
        """
        if direction == 'up':
            start = -30
            angle = 180
            ring_x = self.x
            ring_y = self.y + self.radius
            prob_y = self.y + 1.3*self.radius
            x_cent = ring_x - self.radius + (self.ring_width/2)
            y_cent = ring_y - 0.15
        else:
            start = -210
            angle = 0
            ring_x = self.x
            ring_y = self.y - self.radius
            prob_y = self.y - 1.4*self.radius
            x_cent = ring_x + self.radius - (self.ring_width/2)
            y_cent = ring_y + 0.15

        # Add the ring
        ring = mpatches.Wedge(
            (ring_x, ring_y),
            self.radius,
            start,
            angle,
            width = self.ring_width
        )
        # Add the triangle (arrow)
        offset = 0.2
        left   = [x_cent - offset, ring_y]
        right  = [x_cent + offset, ring_y]
        bottom = [(left[0]+right[0])/2., y_cent]
        arrow  = plt.Polygon([left, right, bottom, left])

        p = PatchCollection(
            [ring, arrow],
            edgecolor = self.ring_edgecolor,
            facecolor = self.ring_facecolor
        )
        ax.add_collection(p)

        # Probability to add?
        if prob:
            ax.annotate(str(prob), xy=(self.x, prob_y), color='#000000', **self.text_args)


class MarkovChain:

    def __init__(self, M, labels):
        """
        Initializes a Markov Chain (for drawing purposes)
        Inputs:
            - M         Transition Matrix
            - labels    State Labels
        """

        if M.shape[0] < 2:
            raise Exception("There should be at least 2 states")
        # if M.shape[0] > 4:
        #     raise Exception("Only works with 4 states max for now")
        if M.shape[0] != M.shape[1]:
            raise Exception("Transition matrix should be square")
        if M.shape[0] != len(labels):
            raise Exception("There should be as many labels as states")

        self.M = M
        self.n_states = M.shape[0]
        self.labels = labels

        # Colors
        self.arrow_facecolor = '#a3a3a3'
        self.arrow_edgecolor = '#a3a3a3'

        self.node_facecolor = '#2693de'
        self.node_edgecolor = '#e6e6e6'

        # Drawing config
        self.node_radius = 0.5
        self.arrow_width = 0.03
        self.arrow_head_width = 0.20
        self.text_args = {
            'ha': 'center',
            'va': 'center',
            'fontsize': 16
        }

        # Build the network
        self.build_network()


    def set_node_centers(self):
        """
        Positions the node centers given the number of states
        """
        # Node positions
        self.node_centers = []

        if self.n_states == 2:
            self.figsize = (10, 4)
            self.xlim = (-5, 5)
            self.ylim = (-2, 2)
            self.node_centers = [[-4,0], [4,0]]
        elif self.n_states == 3:
            self.figsize = (10, 6)
            self.xlim = (-5, 5)
            self.ylim = (-3, 3)
            self.node_centers = [[-3,-2], [3,-2], [-3,2]]
        elif self.n_states == 4:
            self.figsize = (8, 8)
            self.xlim = (-5, 5)
            self.ylim = (-5, 5)
            self.node_centers = [[-4,4], [4,4], [4,-4], [-4,-4]]
        else:
            self.figsize = (self.n_states * 2, self.n_states * 2)
            self.xlim = (-20,20)
            self.ylim = (-20,20)
            self.node_centers = [[-10, 15], [0, 15], [10, 15], [-10, 5], [0, 5], [10, 5], [-10, -5], [0, -5], [10, -5], [-10, -15], [0, -15]]


    def build_network(self):
        """
        Loops through the matrix, add the nodes
        """
        # Position the node centers
        self.set_node_centers()

        # Set the nodes
        self.nodes = []
        for i in range(self.n_states):
            node = Node(
                self.node_centers[i],
                self.node_radius,
                self.labels[i]
            )
            self.nodes.append(node)


    def add_arrow(self, ax, node1, node2, prob=None):
        """
        Add a directed arrow between two nodes
        """
        # x,y start of the arrow
        x_start = node1.x + np.sign(node2.x-node1.x) * node1.radius
        y_start = node1.y + np.sign(node2.y-node1.y) * node1.radius

        # arrow length
        dx = abs(node1.x - node2.x) - 2.5* node1.radius
        dy = abs(node1.y - node2.y) - 2.5* node1.radius

        # we don't want xoffset and yoffset to both be non-nul
        yoffset = 0.4 * self.node_radius * np.sign(node2.x-node1.x)
        if yoffset == 0:
            xoffset = 0.4 * self.node_radius * np.sign(node2.y-node1.y)
        else:
            xoffset = 0

        arrow = mpatches.FancyArrow(
            x_start + xoffset,
            y_start + yoffset,
            dx * np.sign(node2.x-node1.x),
            dy * np.sign(node2.y-node1.y),
            width = self.arrow_width,
            head_width = self.arrow_head_width
        )
        p = PatchCollection(
            [arrow],
            edgecolor = self.arrow_edgecolor,
            facecolor = self.arrow_facecolor
        )
        ax.add_collection(p)

        # Probability to add?
        x_prob = x_start + xoffset + 0.2*dx*np.sign(node2.x-node1.x)
        y_prob = y_start + yoffset + 0.2*dy*np.sign(node2.y-node1.y)
        if prob:
            ax.annotate(str(prob), xy=(x_prob, y_prob), color='#000000', **self.text_args)


    def draw(self):
        """
        Draw the Markov Chain
        """
        fig, ax = plt.subplots(figsize=self.figsize)

        # Set the axis limits
        plt.xlim(self.xlim)
        plt.ylim(self.ylim)

        # Draw the nodes
        for node in self.nodes:
            node.add_circle(ax)

        # Add the transitions
        for i in range(self.M.shape[0]):
            for j in range(self.M.shape[1]):
                # self loops
                if i == j:
                    # Loop direction
                    if self.nodes[i].y >= 0:
                        self.nodes[i].add_self_loop(ax, prob = M[i,j], direction='up')
                    else:
                        self.nodes[i].add_self_loop(ax, prob = M[i,j], direction='down')
                # directed arrows
                elif M[i,j] > 0:
                    self.add_arrow(ax, self.nodes[i], self.nodes[j], prob = M[i,j])

        plt.axis('off')

In [None]:
mc = MarkovChain(m, state_types)

NameError: ignored