# # Random grammar generation

# Creates a scale-free random grammar with specified parameters

We start by obtaining the parameters of the grammar

In [277]:
# Sliders for grammar parameters
from ipywidgets import interact, interactive, fixed, interact_manual

def GetParams(num_words, num_classes, num_class_connectors, connectors_limit):
    return (num_words, num_classes, num_class_connectors, connectors_limit)
grammar_params = interactive(GetParams, 
                             num_words = (10, 50), # vocabulary size
                             num_classes = (3, 10), # number of grammar classes
                             num_class_connectors = (5, 40), # connectors between grammar classes (directed)
                             connectors_limit = (1, 5)) # max connectors a word can have
display(grammar_params)

interactive(children=(IntSlider(value=30, description='num_words', max=50, min=10), IntSlider(value=6, descrip…

In [284]:
# Set Grammar parameters
num_words = grammar_params.result[0]
num_classes = grammar_params.result[1]
num_class_connectors = grammar_params.result[2]
connectors_limit = grammar_params.result[3]

In [285]:
# Populate grammar classes following a Zipf distribution
from fractions import Fraction
import numpy as np

harmonic_number = sum(Fraction(1, d) for d in range(1, num_classes + 1))

zipf_fracs = [1 / x / harmonic_number for x in range(1, num_classes + 1)]

words_per_class = np.round(np.array(zipf_fracs) * num_words)

In [286]:
# Plot vocab fraction vs rank for grammar classes
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *

init_notebook_mode(connected=True)         # initiate notebook for offline plot

class_freqs = Scatter(
    x = np.array(range(1, num_classes + 1)),
    y = words_per_class / num_words
)
Zipf_law = Scatter(
    x = np.array(range(1, num_classes + 1)),
    y = zipf_fracs
)
data = [class_freqs, Zipf_law]
iplot(data)

In [290]:
# Create random connectors between grammar classes
import random as rand
connectors = set()
for i in range(0, num_class_connectors - 1):
    randint1 = rand.randint(0, num_classes - 1)
    randint2 = rand.randint(0, num_classes - 1)
    if randint1 != randint2: # avoid classes connecting to themselves; may cause less connections than param
        connectors.add((randint1, randint2))

#connectors = set([(rand.randint(0, num_classes - 1), rand.randint(0, num_classes - 1)) for i in range(0, num_class_connectors)])
print(connectors)

{(0, 1), (3, 1), (2, 3), (4, 2), (4, 1), (4, 0)}


In [291]:
# Translate connectors into connector labels
connectors_dict = {k:[] for k in range(num_classes)}
for connector in connectors:
    connector_string = "C" + str(connector[0]) + "_" + str(connector[1])
    connectors_dict[connector[0]].append(connector_string + "+")
    connectors_dict[connector[1]].append(connector_string + "-")
    
print(connectors_dict)

{0: ['C0_1+', 'C4_0-'], 1: ['C0_1-', 'C3_1-', 'C4_1-'], 2: ['C2_3+', 'C4_2-'], 3: ['C3_1+', 'C2_3-'], 4: ['C4_2+', 'C4_1+', 'C4_0+']}


In [292]:
# Build the disjuncts randomly, with some directives
dict_disjuncts = {}
for gramm_class, connects in connectors_dict.items():
     # don't conjunct more connectors than available ones, nor than limit
    max_connectors = min(connectors_limit, len(connects))
    disjuncts = []
    
    for connector in connects: # create one conjunct per connector; arbitrary choice
        num_connectors = rand.randint(1, max_connectors) # determine how many connectors for this conjunct
        conjunct = [connector] # current connector always goes in conjunct
        
        diff_connects = connects[:] # make independent copy
        diff_connects.remove(connector) # don't repeat connector in a conjunct
        conjunct.extend(rand.sample(diff_connects, num_connectors - 1)) # add random connectors to conjunct; no repeats
            
        disjuncts.append(conjunct)
        
    dict_disjuncts[gramm_class] = set(tuple(d) for d in disjuncts) # set eliminates duplicate disjuncts

print(dict_disjuncts)

{0: {('C0_1+',), ('C4_0-',)}, 1: {('C3_1-', 'C4_1-'), ('C0_1-',), ('C4_1-',)}, 2: {('C4_2-',), ('C2_3+', 'C4_2-')}, 3: {('C3_1+', 'C2_3-'), ('C2_3-', 'C3_1+')}, 4: {('C4_2+',), ('C4_1+',), ('C4_0+', 'C4_2+')}}
