In [12]:
import os
import csv
import codecs
import copy

import ontoutils
from ontoutils.lucid_chart import Relation

os.chdir('C:/Users/maybra/OneDrive - Universität Zürich UZH/git/theory-database')


In [13]:
junctions = []
containers = []
theory_row_dicts = {}

directory = "theories_2025"

theory_files = [file for file in os.listdir(directory) if file.endswith(".csv")]
model_filenames = {}

for f in theory_files:
    with codecs.open(directory+"/"+str(f), mode='r', encoding="utf-8") as csv_file:
        model_num = str(f).split('.')[0]
        model_filenames[model_num] = str(f)
        model_name = str(f).split('.')[1]

        theory_row_dicts[model_num] = {}
        
        csv_reader = csv.DictReader(csv_file)
        
        for row in csv_reader:
            id = row['Id']
            type = row['Name']
            label = str(row['Text Area 1']).strip()
            line_source = row['Line Source']
            line_dest = row['Line Destination']
            source_arrow = row['Source Arrow']
            dest_arrow = row['Destination Arrow']
            
            theory_row_dicts[model_num][id] = row
            #print(theory_row_dicts[model_num][id])
            if type in ['Process','Text','Rectangle','TerminatorBlock', 'Block','Terminator']:
                
                if type == 'Text' and label.startswith(model_num): # model name. Ignore
                    continue
                
                if type in ['Text','Rectangle','Terminator','Block', 'TerminatorBlock']:
                    print ("In model",model_num,"updating construct entity",id,"of type",type,"to Process")
                    theory_row_dicts[model_num][id]['Name']='Process'
                    theory_row_dicts[model_num][id]['Shape Library']='Flowchart Shapes'

            elif type == 'Line':
                #if len(label)>0: print("Relation label: ",label)
                # Some CSVs may contain lines for which the line source and line destination are the wrong way around. If ‘Source Arrow’ field contains Arrow and ‘Destination Arrow’ field contains None then ‘Line Source’ and ‘Line Destination’ need to be reversed. ‘Source Arrow’ and ‘Destination Arrow’ also need to be reversed. It is permissible that ‘Source Arrow’ and ‘Destination Arrow’ both contain Arrow. It is an error if ‘Source Arrow’ and ‘Destination Arrow’ both contain None; this should be flagged so a researcher can check the diagram. 
                if source_arrow == 'Arrow' and dest_arrow == 'None': 
                    print("In model",model_num,"Reversing direction of arrow id",id)
                    theory_row_dicts[model_num][id]['Line Source']=line_dest
                    theory_row_dicts[model_num][id]['Line Destination']=line_source
                    theory_row_dicts[model_num][id]['Source Arrow']='None'
                    theory_row_dicts[model_num][id]['Destination Arrow']='Arrow'
                if source_arrow == 'None' and dest_arrow == 'None':
                    print("Model:",model_num,"ERROR: NO-DIRECTION ARROW")
            elif type in ['Summing Junction', 'Summing junction', 'Connector', 'Or','Merge','Isosceles Triangle', 'Isosceles triangle', 'Circle']:
                # Replace the circle and triangle
                if type == 'Circle':
                    #print("In model:",model_num," updating Circle with id",id,"to Connector")
                    theory_row_dicts[model_num][id]['Name']='Connector'
                    theory_row_dicts[model_num][id]['Shape Library']='Flowchart Shapes'
                if type in ['Isosceles Triangle', 'Isosceles triangle']:
                    #print("In model:",model_num," updating Isosceles Triangle with id",id,"to Merge")
                    theory_row_dicts[model_num][id]['Name']='Merge'
                    theory_row_dicts[model_num][id]['Shape Library']='Flowchart Shapes'
                junctions.append(str(model_num)+":"+id+":"+type)
            elif type == 'Rectangle Container':
                # A few CSVs may include Container Rectangles which contain other constructs. In these cases there will be data in the ‘Contained By’ field. Any constructs ‘contained by’ another construct should inherit the relationships of the container and the container should be deleted. 
                containers.append(str(model_num)+":"+id)
                #print("Got container: ",id)
            elif type in ['Document','Page']: # ignore
                continue
            else:
                print("ERROR: UNKNOWN TYPE: ",type, "in model", model_num)       
            


In model 1 updating construct entity 3 of type Block to Process
In model 1 updating construct entity 4 of type Block to Process
In model 1 updating construct entity 6 of type Block to Process
In model 1 updating construct entity 7 of type Block to Process
In model 1 updating construct entity 8 of type Block to Process
In model 1 updating construct entity 10 of type Block to Process
In model 1 updating construct entity 11 of type Block to Process
In model 1 updating construct entity 12 of type Block to Process
In model 1 updating construct entity 14 of type Block to Process
In model 1 updating construct entity 15 of type Block to Process
In model 1 updating construct entity 16 of type Block to Process
In model 1 updating construct entity 18 of type Block to Process
In model 1 updating construct entity 19 of type Block to Process
In model 1 updating construct entity 20 of type Block to Process
In model 1 updating construct entity 21 of type Block to Process
In model 1 updating construct 

In [14]:
# Process the junctions and containers out of the relations list by connecting all sources to all targets. Assign the relations the type corresponding to the type of the junction. 

ids_to_remove = {}

for strId in junctions: 
    [model_num,conn_id,type] = strId.split(":")
    if model_num not in ids_to_remove.keys():
        ids_to_remove[model_num] = []
    ids_to_remove[model_num].append(conn_id)  # Remove the connector
    print (f"Processing theory {model_num}")
    maxTheoryId = max( map(int, theory_row_dicts[model_num].keys()) )
    nextId = maxTheoryId+1
    
    if type == 'Merge':
        type_str = 'Type of'
    if type == 'Summing junction':
        type_str = '*'
    if type == 'Connector':
        type_str = 'Part of'
    if type == 'Or':
        type_str = '+'
    
    jnc_srcs = []
    map_srcs = {}
    jnc_tars = []
    map_tars = {}
    
    # This works per model
    all_dests = {id:theory_row_dicts[model_num][id]['Line Destination'] for id in theory_row_dicts[model_num] if len(theory_row_dicts[model_num][id]['Line Destination'])>0 }
    
    for id in theory_row_dicts[model_num]:
        if theory_row_dicts[model_num][id]['Name'] == 'Line':
            line_to_copy = theory_row_dicts[model_num][id]
            source_id = theory_row_dicts[model_num][id]['Line Source']
            target_id = theory_row_dicts[model_num][id]['Line Destination']
            if source_id == conn_id:
                jnc_tars.append(target_id)
                ids_to_remove[model_num].append(id)
                # Check if any rels end on this rel
                if id in set(all_dests.values()): 
                    map_tars[target_id]=id
            if target_id == conn_id:
                jnc_srcs.append(source_id)
                ids_to_remove[model_num].append(id)
                if id in set(all_dests.values()):
                    map_srcs[source_id]=id
    
    if len(map_srcs)>0: 
        print ("Sources to be mapped: ",map_srcs)
    if len(map_tars)>0:
        print ("Targets to be mapped: ",map_tars)
            
    rels_to_add = []
    for s in jnc_srcs:
        for t in jnc_tars:
            rels_to_add.append((s,t))
            
    for (s,t) in rels_to_add:
        rel = copy.deepcopy(line_to_copy)
        rel['Id']=str(nextId)
        rel['Line Source'] = s
        rel['Line Destination'] = t
        rel['Text Area 1'] = type_str
        theory_row_dicts[model_num][str(nextId)] = rel
        if s in map_srcs.keys(): 
            oldId = map_srcs[s]
            for arrowToOldId in  [k for (k,v) in all_dests.items() if v==oldId ]:
                if arrowToOldId not in theory_row_dicts[model_num]:
                    print("ERROR! Can't find original arrow",arrowToOldId,"In model ",model_num)
                else: 
                    theory_row_dicts[model_num][arrowToOldId]['Line Destination'] = str(nextId)
                print ("In model",model_num,"Mapped arrow",arrowToOldId," that ended on ",oldId,"to end on",str(nextId))
        if t in map_tars.keys(): 
            oldId = map_tars[t]
            for arrowToOldId in [k for (k,v) in all_dests.items() if v==oldId ]:
                theory_row_dicts[model_num][arrowToOldId]['Line Destination'] = str(nextId)
                print ("In model",model_num,"Mapped arrow",arrowToOldId," that ended on ",oldId,"to end on",str(nextId))
        nextId = nextId+1
        print("Added expanded arrow ",rel['Id'],"to model",model_num,"relating",s,"to",t)

Processing theory 1
Added expanded arrow  106 to model 1 relating 3 to 6
Added expanded arrow  107 to model 1 relating 3 to 25
Added expanded arrow  108 to model 1 relating 4 to 6
Added expanded arrow  109 to model 1 relating 4 to 25
Processing theory 1
Added expanded arrow  110 to model 1 relating 8 to 7
Processing theory 1
Added expanded arrow  111 to model 1 relating 10 to 25
Added expanded arrow  112 to model 1 relating 10 to 12
Added expanded arrow  113 to model 1 relating 11 to 25
Added expanded arrow  114 to model 1 relating 11 to 12
Processing theory 1
Added expanded arrow  115 to model 1 relating 14 to 16
Added expanded arrow  116 to model 1 relating 14 to 25
Added expanded arrow  117 to model 1 relating 14 to 12
Added expanded arrow  118 to model 1 relating 15 to 16
Added expanded arrow  119 to model 1 relating 15 to 25
Added expanded arrow  120 to model 1 relating 15 to 12
Processing theory 1
Added expanded arrow  121 to model 1 relating 12 to 26
Added expanded arrow  122 to

In [15]:
for strId in containers: 
    [model_num,con_id] = strId.split(":")
    if model_num not in ids_to_remove.keys():
        ids_to_remove[model_num] = []
    ids_to_remove[model_num].append(con_id)
    print("Got container",con_id,"in model",model_num)
    
    maxTheoryId = 1
    for id in theory_row_dicts[model_num]:
        if int(id) > maxTheoryId:
            maxTheoryId = int(id)
    nextId = maxTheoryId+1
    
    contained = []
    rel_tars = []
    
    for id in theory_row_dicts[model_num]:
        source_id = theory_row_dicts[model_num][id]['Line Source']
        target_id = theory_row_dicts[model_num][id]['Line Destination']
        contained_by = theory_row_dicts[model_num][id]['Contained By']
            
        if contained_by == con_id:
            contained.append(id)
            theory_row_dicts[model_num][id]['Contained By'] = ''
            
        if source_id == con_id: # only ever this direction
            line_to_copy = theory_row_dicts[model_num][id]
            rel_tars.append(target_id)
            ids_to_remove[model_num].append(id)
                
    rels_to_add = []
    for c in contained:
        for t in rel_tars:
            rels_to_add.append((c,t))
            
    for (s,t) in rels_to_add:
        rel = copy.deepcopy(line_to_copy)
        rel['Id']=str(nextId)
        rel['Line Source'] = s
        rel['Line Destination'] = t
        theory_row_dicts[model_num][str(nextId)] = rel
        nextId = nextId+1
        print("Added expanded arrow ",rel['Id'],"to model",model_num,"relating",s,"to",t)

Got container 15 in model 30
Added expanded arrow  46 to model 30 relating 7 to 4
Added expanded arrow  47 to model 30 relating 8 to 4
Added expanded arrow  48 to model 30 relating 9 to 4
Added expanded arrow  49 to model 30 relating 10 to 4
Added expanded arrow  50 to model 30 relating 11 to 4
Added expanded arrow  51 to model 30 relating 12 to 4
Added expanded arrow  52 to model 30 relating 13 to 4
Added expanded arrow  53 to model 30 relating 14 to 4
Got container 20 in model 30
Added expanded arrow  54 to model 30 relating 16 to 5
Added expanded arrow  55 to model 30 relating 17 to 5
Added expanded arrow  56 to model 30 relating 18 to 5
Added expanded arrow  57 to model 30 relating 19 to 5
Got container 11 in model 5
Added expanded arrow  63 to model 5 relating 7 to 3
Added expanded arrow  64 to model 5 relating 8 to 3
Added expanded arrow  65 to model 5 relating 9 to 3
Added expanded arrow  66 to model 5 relating 10 to 3
Got container 23 in model 5
Added expanded arrow  67 to mode

In [16]:
# Remove the junctions and arrows to the junctions
for model_num in ids_to_remove.keys():
    ids = set(ids_to_remove[model_num])
    print ("Going to remove ",ids," from model ",model_num)
    for id in ids:
        del theory_row_dicts[model_num][id]

Going to remove  {'5', '119', '95', '83', '17', '47', '49', '35', '78', '71', '63', '25', '88', '113', '41', '72', '45', '50', '13', '62', '46', '75', '48', '53', '107', '92', '74', '54', '43', '111', '79', '77', '76', '87', '116', '44', '64', '42', '58', '65', '73', '9', '109', '57'}  from model  1
Going to remove  {'30', '38', '31', '47', '49', '35', '33', '26', '25', '29', '41', '45', '39', '36', '46', '48', '34', '43', '32', '44', '37', '24', '42', '40'}  from model  10
Going to remove  {'29', '30', '28', '31', '14', '32'}  from model  11
Going to remove  {'38', '17', '47', '49', '26', '41', '45', '50', '39', '36', '20', '22', '43', '15', '51', '44', '37', '42', '40'}  from model  12
Going to remove  {'129', '170', '169', '83', '159', '168', '29', '173', '136', '197', '93', '158', '205', '202', '113', '97', '85', '131', '172', '145', '190', '124', '164', '51', '188', '110', '147', '56', '42', '127', '185', '184', '171', '191', '94', '99', '182', '60', '144', '146', '195', '179', '1

In [17]:
# Check for arrows that start on other arrows and correct them
for model_num in theory_row_dicts.keys():
    for row in theory_row_dicts[model_num].keys():
        if theory_row_dicts[model_num][row]['Name'] == 'Line':
            line_source = theory_row_dicts[model_num][row]['Line Source']
            if line_source == '':
                print ("Error! No line source found for line ",row," in theory ",model_num)
                continue
            if line_source not in theory_row_dicts[model_num].keys():
                print ("Error! Line source no longer exists for line ",row,"in theory ",model_num)
                continue
            line_source_type = theory_row_dicts[model_num][line_source]['Name']
            if line_source_type == 'Line':
                orig_source = theory_row_dicts[model_num][line_source]['Line Source']
                print("Arrow starting on arrow: ",row," starts on ",line_source," in theory ",model_num, "...Updating to original source",orig_source)
                theory_row_dicts[model_num][row]['Line Source'] = orig_source
            


Error! Line source no longer exists for line  124 in theory  1
Error! Line source no longer exists for line  125 in theory  1
Error! Line source no longer exists for line  126 in theory  1
Arrow starting on arrow:  79  starts on  65  in theory  29 ...Updating to original source 3
Arrow starting on arrow:  89  starts on  40  in theory  66 ...Updating to original source 3


In [18]:
# Check for arrows that end on other arrows and correct them
tofix = {}
for model_num in theory_row_dicts.keys():
    for row in theory_row_dicts[model_num].keys():
        if theory_row_dicts[model_num][row]['Name'] == 'Line':
            line_dest = theory_row_dicts[model_num][row]['Line Destination']
            if line_dest == '':
                print ("Error! No line destination found for line ",row," in theory ",model_num)
                continue
            if line_dest not in theory_row_dicts[model_num].keys():
                print ("Error! Line destination no longer exists for line ",row,"in theory ",model_num)
                continue
            line_dest_type = theory_row_dicts[model_num][line_dest]['Name']
            if line_dest_type == 'Line':
                rel_type = theory_row_dicts[model_num][row]['Text Area 1']
                orig_rel_type = theory_row_dicts[model_num][line_dest]['Text Area 1'].strip()
                #print("Arrow ending on arrow: ",row," ends on ",line_dest," in theory ",model_num,", this rel type: ",rel_type,", orig rel type: ",orig_rel_type)
                if model_num not in tofix.keys():
                    tofix[model_num] = {}
                if line_dest not in tofix[model_num].keys():
                    tofix[model_num][line_dest] = []
                tofix[model_num][line_dest].append(row)

In [19]:
for model_num in tofix.keys():
    dests = tofix[model_num].keys()
    for line_dest in dests: 
        # A list of rows that have to be updated 
        rows = tofix[model_num][line_dest]
        
        maxTheoryId = max( map(int, theory_row_dicts[model_num].keys()) )
        nextId = maxTheoryId+1
        
        # Replace the original arrow with a reified class and two arrows
        # Class name: "the X-Y <type> relationship"
        # Then relate the third arrow to this class
        # 1. Copy the original source to make the new entity: 
        orig_source_id = theory_row_dicts[model_num][line_dest]['Line Source']
        orig_dest_id = theory_row_dicts[model_num][line_dest]['Line Destination']
        print ("Modifying line from ",orig_source_id,"to",orig_dest_id)
        orig_source_name = theory_row_dicts[model_num][orig_source_id]['Text Area 1'].strip()
        orig_dest_name = theory_row_dicts[model_num][orig_dest_id]['Text Area 1'].strip()
        orig_rel_type = theory_row_dicts[model_num][line_dest]['Text Area 1'].strip()
    
        relEntity = copy.deepcopy(theory_row_dicts[model_num][orig_source_id])
        relEntity['Id']=str(nextId)
        relEntity['Name']='Terminator' # rounded box
        relLabel = Relation.getFullLabelForShortLabel(orig_rel_type)
        if relLabel is None:
            relLabel = orig_rel_type
        relEntity['Text Area 1'] = f"the '{orig_source_name}' to '{orig_dest_name}' {relLabel} relationship"
        print(relEntity['Text Area 1'])
        theory_row_dicts[model_num][str(nextId)] = relEntity
        # Map all the arrows that ended on an arrow on to the new entity
        for row in rows: 
            theory_row_dicts[model_num][row]['Line Destination'] = str(nextId)
        # Create one more relation to connnect the new entity to its original dest
        relEntity = copy.deepcopy(theory_row_dicts[model_num][line_dest])
        relEntity['Line Source'] = str(nextId)
        
        # Map the original relation onto this new entity
        theory_row_dicts[model_num][line_dest]['Line Destination'] = str(nextId)
        theory_row_dicts[model_num][line_dest]['Text Area 1'] = "relates through"

        nextId = nextId+1
        relEntity['Id']=str(nextId)
        relEntity['Text Area 1'] = "relates to"
        theory_row_dicts[model_num][str(nextId)] = relEntity
        print ("Fixed ",line_dest,"in",model_num)

Modifying line from  27 to 29
the 'Performance of goal-directed behaviours' to 'Goal attainment / failure' Influences relationship
Fixed  89 in 1
Modifying line from  21 to 24
the 'Implementation intentions' to 'Trying' Positively influences relationship
Fixed  56 in 1
Modifying line from  19 to 21
the 'Desire' to 'Implementation intentions' Influences relationship
Fixed  70 in 1
Modifying line from  21 to 22
the 'Behavioural intention' to 'Behaviour' Influences relationship
Fixed  40 in 14
Modifying line from  14 to 17
the 'Individual differences' to 'Message acceptance' Influences (*) relationship
Fixed  79 in 16
Modifying line from  14 to 11
the 'Individual differences' to 'Fear' Influences (*) relationship
Fixed  80 in 16
Modifying line from  18 to 13
the 'Feedback-standard discrepancies' to 'Performance / behaviour' Influences relationship
Fixed  57 in 17
Modifying line from  9 to 6
the 'Norms' to 'Behaviour' Influences relationship
Fixed  11 in 18
Modifying line from  5 to 8
the 

In [20]:
# write the files out again, fixed
for model_num in theory_row_dicts.keys():
    model_filename = model_filenames[model_num]+"MODIFIED"
    with codecs.open("theories_2025/processed/"+model_filename+".csv",mode='w', encoding="utf-8") as outfile:
        fieldNames = [k for k in theory_row_dicts[model_num]['1'].keys()]
        #print (model_num,fieldNames)
        writer = csv.DictWriter(outfile,fieldNames)
        writer.writeheader()
        for rowId in theory_row_dicts[model_num].keys():
            writer.writerow(theory_row_dicts[model_num][rowId])