In [1]:
import pandas as pd
import numpy as np
import spacy

In [2]:
h = 'if temperature increases then sound decreases'

nlp = spacy.load("en_core_web_sm")
doc = nlp(h)

# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])

print([chunk for chunk in doc])
print([chunk.lemma_ for chunk in doc])
print([chunk.dep_ for chunk in doc])
print([chunk.pos_ for chunk in doc])

doc.vector

Noun phrases: ['temperature increases']
Verbs: []
[if, temperature, increases, then, sound, decreases]
['if', 'temperature', 'increase', 'then', 'sound', 'decrease']
['mark', 'compound', 'nsubj', 'advmod', 'nsubj', 'ROOT']
['SCONJ', 'NOUN', 'NOUN', 'ADV', 'ADJ', 'NOUN']


array([ 0.99722916,  1.1348454 ,  0.582306  , -1.4066116 ,  0.6502101 ,
       -0.6840294 , -0.6691726 ,  1.1938537 ,  1.2112681 ,  0.6967559 ,
       -0.6239522 ,  0.49588263, -1.3142248 ,  0.3996046 ,  1.1564846 ,
       -0.27339533, -0.55502516,  0.14758466, -0.6036237 , -0.5917361 ,
        1.0621302 ,  0.02475539,  0.9418297 ,  0.13122554,  0.8650777 ,
        1.1480318 , -1.301257  , -1.3851271 , -2.6814482 , -1.6239945 ,
        0.12802956,  0.21847264,  0.8066585 , -0.2579772 ,  0.18061757,
        0.22107458,  0.40674683, -0.38718352, -0.898751  ,  0.18371415,
        1.1692206 ,  0.05243275,  0.41899836, -1.1361624 , -0.09325043,
        0.76633126,  1.0684794 ,  2.4825003 ,  0.13061416,  0.15378147,
       -1.5021211 , -1.3664193 ,  0.7509009 ,  0.34877333, -0.59150773,
        1.4339224 ,  0.7099588 ,  0.89903474, -0.8322229 ,  0.36417747,
        0.07456248, -0.6681313 ,  0.17610611, -1.9105297 , -0.50984687,
        1.3061296 ,  0.41460586, -0.50937027,  0.40833613,  0.42

In [4]:
variables = ['temperature', 'radiation', 'brightness', 'light', 'heat']
modifiers = ['increases', 'decreases']
interactors = ['is greater than', 'is smaller than', 'is equal to']
qualifiers = ['in point A', 'in point B', 'of the object']

In [5]:
def generate_actions(variables, modifers, interactors, qualifiers):
    """Generates a list of possible action structured with keys given input combination.
            Possible structures: 
                1. Action = Variable + Qualifier + Modifier
                2. Action = Modifier + Variable + Qualifier
                3. Action = Variable + Qualifier + Interactor + Value
                4. Action = Variable + Qualifier + Interactor + Variable + Qualifier
                4. Action = Variable + Qualifier + Interactor + Qualifier 
                    (assuming second qualifier refers to the first variable)
                5. Action = Action and Action
                    (recursion is allowed)
    """
    actions = []
    for vi in variables:
        for qi in qualifiers:
            for mi in modifiers:
                # actions type 1
                actions.append({'variable': vi, 'qualifier': qi, 'modifier': mi})
                # actions type 2
                actions.append({'modifier': mi, 'variable': vi, 'qualifier': qi})
            for ii in interactors:
                # actions type 3
                actions.append({'variable': vi, 'qualifier': qi, 'interactor': ii, 
                                'value':np.random.randint(100)})
                while True:
                    qj = np.random.choice(qualifiers)
                    if qj != qi:
                        break
                # actions type 4
                actions.append({'variable': vi, 'qualifier': qi, 'interactor': ii, 
                                'variable_': vi, 'qualifier_': qj})
                # actions type 4 
                actions.append({'variable': vi, 'qualifier': qi, 'interactor': ii, 
                                'qualifier_': qj})
    composed_actions = []
    for i in range(len(actions)):
        # sample two actions of different variables (including those generated in this loop)
        while True:
            ai, aj = np.random.choice(actions, 2, replace=False)
            if ai['variable'] != aj['variable']:
                break
        # add numerical label to subsequent keys to avoid overriding ai's keys
        c = np.sum([1 if 'and' in k else 0 for k in {**ai, **aj}]) + 1
        aj = {f'{k}{c}':aj[k] for k in aj} 
        # actions type 5
        composed_actions.append({**ai, 'and':'and', **aj})
    actions += composed_actions
    return actions


class Action:
    """Wrapper to operate with individual actions"""
    
    def __init__(self, a):
        self.a = a
        self.text = self.__repr__()
        
    def __repr__(self):
        return ' '.join([str(self.a[t]) for t in self.a])
        
    def remove_variable(self):
        """Randomly removes one of the variables in the action, so it is incomplete."""
        v_ = np.random.choice([k for k in self.a if 'variable' in k and 'variable_' not in k])
        _a = self.a.copy()
        _a.pop(v_, None)
        return _a
    
    def remove_modifier(self):
        """Removing the modifier of an action, so nothing is changing."""
        assert 'modifier' in self.a, \
            "Not possible, there is not modifiers on this action."
        _a = self.a.copy()
        _a.pop('modifier', None)
        return _a
    
    def change_second_variable(self, variables):
        """Changing the second variable to a different, thus comparing two incomparable variables."""
        assert 'variable_' in self.a, \
            "Not possible, there is not another variable to change."
        while True:
            vj = np.random.choice(variables)
            if self.a['variable'] != vj:
                break
        _a = self.a.copy()
        _a['variable_'] = vj
        return _a
            
    def remove_qualifier(self):
        """Removing the qualifier of an action, so it is incomplete."""
        assert 'qualifier' in self.a, \
            "Not possible, there is not qualifiers on this action."
        _a = self.a.copy()
        _a.pop('qualifier', None)
        return _a
        
    
actions = [Action(a) for a in generate_actions(variables, modifiers, interactors, qualifiers)]
# [a.remove_qualifier() for a in actions if 'qualifier' in a.a]
list(np.random.choice(actions, 10, replace=False))

[light in point A decreases,
 increases heat in point A,
 brightness of the object increases,
 radiation of the object increases,
 decreases brightness in point A and temperature in point B is smaller than of the object,
 light in point B is smaller than in point A,
 heat in point B increases and increases radiation in point A,
 heat in point A is smaller than heat of the object and light of the object is greater than 81,
 heat in point A increases and decreases brightness in point A,
 light in point B is greater than 56]

In [6]:
def generate_valid_hypotheses(actions):
    """Generates a list of Action which hypothesis follow the grammar in Kroeze et al., 2019.
        For example, to test the effect of x in y, x should contain one variable and a modifier."""
    h = []
    for ax in actions:
        for ay in actions:
            if ax.text != ay.text:
                ax_one_var = np.sum([1 if 'variable' in a else 0 for a in ax.a]) == 1
                ax_one_qlf = np.sum([1 if 'interactor' in a else 0 for a in ax.a]) == 1
                if (ax_one_var or ax_one_qlf) and ax.a['variable'] not in ay.text:
                    h.append(f'if {ax} then {ay}')
                    h.append(f'{ay} if {ax}')
                    if ay.a['variable'][:5] == ax.text[:5]:
                        h.append(f'if {ax}, the {ay}')
    return h

def generate_error1_hypotheses(actions):
    """Invalid hypothesis according to criteria 1 (Kroeze et al., 2019).
        Hypothesis does not contain two variable names."""
    h = []
    return h

def generate_error2_hypotheses(actions):
    """Invalid hypothesis according to criteria 1 (Kroeze et al., 2019).
        Hypothesis does not contain a modifier in action x."""
    h = []
    return h
    
def generate_error3_hypotheses(actions):
    """Invalid hypothesis according to criteria 1 (Kroeze et al., 2019).
        Hypothesis is not syntactically correct."""
    h = []
    return h    
    
def generate_error4_hypotheses(actions):
    """Invalid hypothesis according to criteria 1 (Kroeze et al., 2019).
        Hypothesis manipulates more than one variable in action x."""
    h = []
    return h  

def generate_error5_hypotheses(actions):
    """Invalid hypothesis according to criteria 1 (Kroeze et al., 2019).
        Hypothesis which variables does not contain a qualifier."""
    h = []
    return h  


hypothesis = generate_valid_hypotheses(actions)
list(np.random.choice(hypothesis, 10, replace=False))

['heat in point B is smaller than 88 if brightness of the object is equal to 24',
 'if brightness in point A is smaller than of the object and decreases light in point A then decreases temperature of the object',
 'heat in point B increases and increases radiation in point A if brightness of the object is smaller than 5',
 'if heat of the object is greater than in point A then brightness of the object is greater than in point B',
 'if light of the object is smaller than 70 and heat of the object increases then radiation in point A is equal to of the object and decreases temperature in point A',
 'if temperature of the object decreases and brightness in point A is smaller than brightness of the object then heat of the object is equal to 49',
 'heat in point B is greater than heat in point A if radiation in point A decreases',
 'heat in point B is smaller than in point A and temperature of the object is equal to temperature in point A if brightness in point A is smaller than 8',
 'bright