<a href="https://colab.research.google.com/github/javiervz/zipf/blob/master/zipf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**A decentralized route to the origins of scaling in human language**

First, we create a population 

In [0]:
import networkx as nx
import numpy as np
import random

In [0]:
# periodic grid graph of dim 2 and length L
def grid(L):
  G = nx.grid_graph([L,L])
  return G

In [0]:
# matrix of ones and zeros (at random)
def lexical_matrix(n):
  M=np.array([0 if random.random()<0.5 else 1 for i in range(n**2)]).reshape(n,n)
  return M

In [0]:
# a population is defined by a (i) grid; and (ii) a dict of numpy matrices (representing vocabularies)
# L -> lenght of the grid 
# n -> number of words (rows of the lexical matrices)
# n -> number of meanings (columns of lexical matrices) (that is, only squared matrices)
def population(L,n):
  # grid graph
  G=grid(L)
  nodes=G.nodes()
  lexical_dict={node:lexical_matrix(n) for node in nodes}
  return [G,lexical_dict]

In [22]:
population(5,2)

[<networkx.classes.graph.Graph at 0x7fcbd9ad95c0>, {(0, 0): array([[1, 1],
         [1, 1]]), (0, 1): array([[0, 0],
         [1, 0]]), (0, 2): array([[1, 0],
         [0, 0]]), (0, 3): array([[0, 1],
         [0, 1]]), (0, 4): array([[1, 0],
         [0, 1]]), (1, 0): array([[1, 1],
         [0, 0]]), (1, 1): array([[1, 0],
         [1, 1]]), (1, 2): array([[1, 0],
         [1, 0]]), (1, 3): array([[0, 0],
         [0, 1]]), (1, 4): array([[0, 0],
         [0, 1]]), (2, 0): array([[0, 0],
         [0, 1]]), (2, 1): array([[1, 0],
         [1, 1]]), (2, 2): array([[0, 0],
         [1, 0]]), (2, 3): array([[0, 0],
         [0, 0]]), (2, 4): array([[1, 1],
         [0, 0]]), (3, 0): array([[1, 0],
         [1, 1]]), (3, 1): array([[0, 1],
         [1, 1]]), (3, 2): array([[1, 1],
         [0, 0]]), (3, 3): array([[0, 1],
         [0, 0]]), (3, 4): array([[1, 0],
         [1, 1]]), (4, 0): array([[0, 0],
         [1, 0]]), (4, 1): array([[0, 0],
         [0, 0]]), (4, 2): array([[1, 1],
 

Second, we define some functions on lexical interests

In [0]:
# this function allows us to consider the level of lexical compromise of agents
# matrix -> lexical matrix of the speaker
# meaning -> selected column
# speaker_interest -> if 0 the speaker selects the most ambiguous word; else, the speaker selects the least ambiguous word
def select_word(matrix,meaning,speaker_interest):
  # sum of rows (number of meanings!)
  sum_rows=matrix.sum(axis=1)
  # select the words with the most ambiguity if speaker==0
  if speaker_interest==0:
    ambiguity=max(sum_rows)
  else:
    ambiguity=min(sum_rows)
  # indices associated to ambiguity
  indices_ambiguity=[index for index in range(len(sum_rows)) if sum_rows[index]==ambiguity]
  # select only one word!
  word=random.choice(indices_ambiguity)
  return word
  

In [0]:
# Now, we define a simple speaker-hearer interaction
# speaker, hearer -> agent's locations (nodes!)
# lexical_dict -> dictionary of lexical metrices
# speaker_interest -> if 0 the speaker selects the most ambiguous word; else, the speaker selects the least ambiguous word
# meaning -> the topic of the conversation
def interaction(speaker,hearer,lexical_dict,speaker_interest,meaning):
  # is there some word for the meaning?
  # speaker's lexical matrix
  speaker_matrix=lexical_dict[speaker]
  if sum(speaker_matrix[meaning,:])==0:
    # add some word
    speaker_matrix[random.choice(range(len(speaker_matrix)))][meaning]=1
    # update speaker's lexical matrix
    lexical_dict[speaker]=speaker_matrix
  # lexical matrices
  hearer_matrix=lexical_dict[hearer]
  speaker_matrix=lexical_dict[speaker]
  # the speaker selects one word according to its lexical compromise
  word=select_word(lexical_dict[speaker],meaning,speaker_interest)
  # now, simple naming game rules
  # the hearer does not know the word (failure!)
  if hearer_matrix[word][meaning]==0:
    hearer_matrix[word][meaning]=1
  lexical_dict[hearer]=hearer_matrix
  # else (success!)
  if hearer_matrix[word][meaning]==1:
    n=len(speaker_matrix)
    for i in range(n):
      # all words are set to 0
      hearer_matrix[i][meaning]=0
      speaker_matrix[i][meaning]=0
    # except one!
    hearer_matrix[word][meaning]=1
    speaker_matrix[word][meaning]=1
  # update dict of lexical matrices
  lexical_dict[hearer]=hearer_matrix
  lexical_dict[speaker]=speaker_matrix
  return lexical_dict
  
  
  
  

Third, preparing the simulation

In [0]:
# simulation of the agent-based model
# time -> number of interactions
# P -> population
def simulation(time,P,speaker_interest,n):
  # nodes 
  nodes=list(P[0].nodes())
  # lexical_matrices
  lexical_matrices=P[1]
  # loop!
  for i in range(time):
    # topic of the conversation
    meaning=random.choice(list(range(n)))
    # speaker
    speaker=random.choice(nodes)
    # hearer
    hearer=random.choice(list(P[0].neighbors(speaker)))
    P[1]=interaction(speaker,hearer,P[1],speaker_interest,meaning)
  
  return P

In [0]:
## measuring the effective vocabulary
def effective_vocabulary(lexical_matrices,n): 
  a=0
  for node in lexical_matrices.keys():
    for i in range(n):
      if sum(lexical_matrices[node][i,:])>0:
        a+=1

  return a/float(len(lexical_matrices.keys())*n)

In [72]:
P=population(32,3)
print(effective_vocabulary(P[1],3))
print(effective_vocabulary(simulation(100000,P,0,3)[1],2))

0.8792317708333334
0.3134765625
