<a href="https://colab.research.google.com/github/javiervz/zipf/blob/master/zipf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**A decentralized route to the origins of scaling in human language**

First, we create a population 

In [0]:
import networkx as nx
import numpy as np
import random

In [0]:
# periodic grid graph of dim 2 and lenght L
def grid(L):
  G = nx.grid_graph([L,L])
  return G

In [0]:
# matrix of ones and zeros (at random)
def matrix(n):
  M=np.array([0 if random.random()<0.5 else 1 for i in range(n**2)]).reshape(n,n)
  return M

In [0]:
# a population is defined by a (i) grid; and (ii) a dict of numpy matrices (representing vocabularies)
# L -> lenght of the grid 
# n -> number of words (rows of the lexical matrices)
# n -> number of meanings (columns of lexical matrices) (that is, only squared matrices)
def population(L,n):
  # grid graph
  G=grid(L)
  nodes=G.nodes()
  lexical_dict={i:matrix(n) for i in range(len(G))}
  return [G,lexical_dict]

In [0]:
population(5,2)

[<networkx.classes.graph.Graph at 0x7f6308beea90>, {0: array([[0, 1],
         [1, 0]]), 1: array([[1, 0],
         [1, 1]]), 2: array([[0, 1],
         [0, 1]]), 3: array([[1, 0],
         [1, 0]]), 4: array([[1, 0],
         [1, 0]]), 5: array([[1, 0],
         [1, 0]]), 6: array([[1, 1],
         [1, 0]]), 7: array([[1, 0],
         [1, 1]]), 8: array([[1, 1],
         [0, 0]]), 9: array([[1, 0],
         [0, 1]]), 10: array([[1, 0],
         [1, 0]]), 11: array([[1, 0],
         [1, 0]]), 12: array([[0, 1],
         [1, 0]]), 13: array([[1, 1],
         [0, 1]]), 14: array([[0, 1],
         [0, 1]]), 15: array([[0, 0],
         [0, 0]]), 16: array([[1, 0],
         [1, 1]]), 17: array([[0, 1],
         [0, 0]]), 18: array([[0, 0],
         [0, 0]]), 19: array([[0, 0],
         [1, 1]]), 20: array([[1, 1],
         [0, 1]]), 21: array([[1, 1],
         [0, 0]]), 22: array([[1, 1],
         [0, 1]]), 23: array([[0, 0],
         [1, 0]]), 24: array([[1, 0],
         [0, 1]])}]

Second, we define some functions on lexical interests

In [0]:
# this function allows us to consider the level of lexical compromise of agents
# matrix -> lexical matrix of the speaker
# meaning -> selected column
# speaker_interest -> if 0 the speaker selects the most ambiguous word; else, the speaker selects the least ambiguous word
def select_word(matrix,meaning,speaker_interest):
  # sum of rows (number of meanings!)
  sum_rows=matrix.sum(axis=1)
  # select the words with the most ambiguity if speaker==0
  if speaker_interest==0:
    ambiguity=max(sum_rows)
  else:
    ambiguity=min(sum_rows)
  # indices associated to ambiguity
  indices_ambiguity=[index for index in range(len(sum_rows)) if sum_rows[index]==ambiguity]
  # select only one word!
  word=random.choice(indices_ambiguity)
  return word
  

In [0]:
# Now, we define a simple speaker-hearer interaction
# speaker, hearer -> agent's locations
# lexical_dict -> dictionary of lexical metrices
# speaker_interest -> if 0 the speaker selects the most ambiguous word; else, the speaker selects the least ambiguous word
# meaning -> the topic of the conversation
def interaction(speaker,hearer,lexical_dict,speaker_interest,meaning):
  # the speaker selects one word according to its lexical compromise
  word=select_word(matrix,meaning,speaker_interest)
  # now, simple naming game rules
  

2