# Model 1

This is a first attempt at modelling the Iron Hypothesis with graphs. Here the nodes are Increase/Decrease events for specific variables.

In [3]:
from py2neo import Graph, Node, Relationship
from scripts.vis import draw

In [4]:
# create new empty graph
graph = Graph()
graph.delete_all()

# select "var" property as the node label to use in visualization
options = {"Increase": "var", "Decrease": "var"}

# create causal relation
iron = Node("Increase", var="iron to seawater")
phytoplankton = Node("Increase", var="phytoplankton")
cause1 = Relationship(iron, "CAUSES", phytoplankton)
graph.create(iron, phytoplankton, cause1)

# create isa relation
producer = Node("Increase", var="primary producers")
isa = Relationship(phytoplankton, "ISA", producer)
graph.create(producer, isa)

# create cause2 relation
production = Node("Increase", var="productivity")
cause2 = Relationship(producer, "CAUSES", production)
graph.create(production, cause2)


draw(graph, options)

In [5]:
# infer new causal relation

query = """
MATCH (cause) -[:CAUSES]-> (effect), (effect) -[:ISA]-> (superclass)
CREATE(cause) -[:CAUSES {type:'inferred'}]-> (superclass)
"""

graph.cypher.execute(query)

draw(graph, options)

In [6]:
query = """
MATCH path = (:Increase {var:'iron to seawater'})-[:CAUSES*]->()
WITH extract(node IN nodes(path) | node.var) AS variables
RETURN variables
"""

graph.cypher.execute(query)

   | variables                                                
---+-----------------------------------------------------------
 1 | ['iron to seawater', 'phytoplankton']                    
 2 | ['iron to seawater', 'primary producers']                
 3 | ['iron to seawater', 'primary producers', 'productivity']

In [7]:
query = """
MATCH path = (:Increase {var:'iron to seawater'})-[:CAUSES*]->()
WITH extract(node IN nodes(path) | head(Labels(node)) + ' of ' + node.var) AS chains
RETURN chains
"""

graph.cypher.execute(query)

   | chains                                                                                       
---+-----------------------------------------------------------------------------------------------
 1 | ['Increase of iron to seawater', 'Increase of phytoplankton']                                
 2 | ['Increase of iron to seawater', 'Increase of primary producers']                            
 3 | ['Increase of iron to seawater', 'Increase of primary producers', 'Increase of productivity']

In [8]:
# create causal relation
net_high_lat_prod = Node("Increase", var="the net high latitude productivity")
pco2_ice_age = Node("Decrease", var="PCO2 of the last ice age")
cause3 = Relationship(net_high_lat_prod, "CAUSES", pco2_ice_age)
graph.create(net_high_lat_prod, pco2_ice_age, cause3)

# create generalization
gen1 = Relationship(production, "GENERALISES", net_high_lat_prod)
graph.create(gen1)

# create generalization
pco2 = Node("Decrease", var="PCO2")
gen2 = Relationship(pco2, "GENERALISES", pco2_ice_age)
graph.create(pco2, gen2)

draw(graph, options)

In [9]:
# infer new causal relation between "productivity" and "PCO2 of the last ice age"

query = """
MATCH (cause) -[:CAUSES]-> (effect), (generalisation) -[:GENERALISES]-> (cause)
CREATE(generalisation) -[:CAUSES {type:'inferred'}]-> (effect)
"""

graph.cypher.execute(query)

draw(graph, options)

In [10]:
# infer new causal relation between "productivity" and "PCO2 of the last ice age"

query = """
MATCH (cause) -[:CAUSES]-> (effect), (generalisation) -[:GENERALISES]-> (effect)
CREATE(cause) -[:CAUSES {type:'inferred'}]-> (generalisation)
"""

graph.cypher.execute(query)

draw(graph, options)

In [11]:
# extract causal chains

query = """
MATCH path = (:Increase {var:'iron to seawater'})-[:CAUSES*]->()
WITH extract(node IN nodes(path) | head(Labels(node)) + ' of ' + node.var) AS chains
RETURN chains
"""

# print hypotheses
result = graph.cypher.execute(query)
for row in result:
        print(" ==> ".join(row[0]) + "\n")

Increase of iron to seawater ==> Increase of phytoplankton

Increase of iron to seawater ==> Increase of primary producers

Increase of iron to seawater ==> Increase of primary producers ==> Increase of productivity

Increase of iron to seawater ==> Increase of primary producers ==> Increase of productivity ==> Decrease of PCO2 of the last ice age

Increase of iron to seawater ==> Increase of primary producers ==> Increase of productivity ==> Decrease of PCO2



This approach works for a simple example like this, but does not scale very well. For instance, the ISA and GENERALISES relations hold among __events__ rather than __variables__, so you need three edges:

- change in phytoplankton ISA change in primary producers
- increase in phytoplankton ISA increase in primary producers
- decrease in phytoplankton ISA decrease in primary producers

However, what you want to do is say:

- phytoplankton ISA primary producers

This can only be done if the variables become separate nodes, connected to events.  
