# # Random grammar generation

# Creates a scale-free random grammar with specified parameters

In [4]:
from ipywidgets import interact, interactive, fixed, interact_manual

In [14]:
# Grammar parameters
def Slider1(x):
    return x
vocab_size = interact(Slider1, x = 10)

def Slider2(y):
    return y
num_classes = interact(Slider2, y = 2)

interactive(children=(IntSlider(value=10, description='x', max=30, min=-10), Output()), _dom_classes=('widget-…

interactive(children=(IntSlider(value=2, description='y', max=6, min=-2), Output()), _dom_classes=('widget-int…

In [227]:
# Grammar parameters
num_words = 50 # vocabulary size
num_classes = 5 # number of grammar classes
num_class_connectors = 8 # number of connectors between grammar classes (direction is important)
connectors_limit = 2 # max number of connectors a word can have

In [228]:
# Populate grammar classes following a Zipf distribution
from fractions import Fraction
import numpy as np

harmonic_number = sum(Fraction(1, d) for d in range(1, num_classes + 1))

zipf_fracs = [1 / x / harmonic_number for x in range(1, num_classes + 1)]

words_per_class = np.round(np.array(zipf_fracs) * num_words)

In [229]:
# Plot vocab fraction vs rank for grammar classes
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *

init_notebook_mode(connected=True)         # initiate notebook for offline plot

class_freqs = Scatter(
    x = np.array(range(1, num_classes + 1)),
    y = words_per_class / num_words
)
Zipf_law = Scatter(
    x = np.array(range(1, num_classes + 1)),
    y = zipf_fracs
)
data = [class_freqs, Zipf_law]
iplot(data)

In [230]:
# Create random connectors between grammar classes
import random as rand
connectors = set()
for i in range(0, num_class_connectors - 1):
    randint1 = rand.randint(0, num_classes - 1)
    randint2 = rand.randint(0, num_classes - 1)
    if randint1 != randint2: # avoid classes connecting to themselves; may cause less connections than param
        connectors.add((randint1, randint2))

#connectors = set([(rand.randint(0, num_classes - 1), rand.randint(0, num_classes - 1)) for i in range(0, num_class_connectors)])
print(connectors)

{(1, 2), (1, 0), (3, 1), (0, 3), (4, 1), (4, 0)}


In [231]:
# Translate connectors into connector labels
connectors_dict = {k:[] for k in range(num_classes)}
for connector in connectors:
    connector_string = "C" + str(connector[0]) + "_" + str(connector[1])
    connectors_dict[connector[0]].append(connector_string + "+")
    connectors_dict[connector[1]].append(connector_string + "-")
    
print(connectors_dict)

{0: ['C1_0-', 'C0_3+', 'C4_0-'], 1: ['C1_2+', 'C1_0+', 'C3_1-', 'C4_1-'], 2: ['C1_2-'], 3: ['C3_1+', 'C0_3-'], 4: ['C4_1+', 'C4_0+']}


In [233]:
# Build the disjuncts randomly, with some directives
dict_disjuncts = {}
for gramm_class, connects in connectors_dict.items():
     # don't conjunct more connectors than available ones, nor than limit
    max_connectors = min(connectors_limit, len(connects))
    disjuncts = []
    
    for connector in connects: # create one conjunct per connector; arbitrary choice
        num_connectors = rand.randint(1, max_connectors) # determine how many connectors for this conjunct
        conjunct = [connector] # current connector always goes in conjunct
        
        diff_connects = connects[:] # make independent copy
        diff_connects.remove(connector) # don't repeat connector in a conjunct
        conjunct.extend(rand.sample(diff_connects, num_connectors - 1)) # add random connectors to conjunct; no repeats
            
        disjuncts.append(conjunct)
        
    dict_disjuncts[gramm_class] = set(tuple(d) for d in disjuncts) # set eliminates duplicate disjuncts

print(dict_disjuncts)

{0: {('C1_0-',), ('C4_0-',), ('C0_3+',)}, 1: {('C4_1-', 'C1_0+'), ('C3_1-', 'C1_0+'), ('C1_2+',), ('C1_0+',)}, 2: {('C1_2-',)}, 3: {('C0_3-', 'C3_1+'), ('C3_1+',)}, 4: {('C4_1+', 'C4_0+'), ('C4_0+',)}}
