## NK Model

In [5]:
#import necessary packages
import numpy as np
import random as rd
#set values of N and K here
N = 4
K = 2
B = 2**(K+1) #=total fitness contributions per gene (= columns in fitness matrix)
M = 4 #total number of individuals (= # average genomes or groups)

Creating 3 matrices with versions of NK model:
- NK - without neutrality
- NKp - with neutrality (probabilistic)
- NKq - with neutrality (quantised)

In [6]:
#create fitness matrix without neutrality: simply array of shape (N,X), filled with random decimals 
fmk = np.random.rand(N, B)

#NKp: reduce fraction of fitness contributions in fm to 0
# p = probability a value is set to 0
# Verel et al.: p ∈ {0.5, 0.8, 0.9}; Geard et al. p = ((N-1)/N) -  but explore options
p = ((N-1)/N) #is it? :)
fmp = np.where(np.random.rand(*fmk.shape) < p, 0, fmk) 
#takes array of shape fmk and filled with random decimals, and if decimals in that array are below p, corresponding decimal from fmk becomes 0 in new array fmp

#NKq: fitness contribution < 0.5 == 0, > 0.5 == 1
# q = number of quantiles (or levels). q > 1 - i.e. setting q as 2 will divide the decimals in two groups of integers: 0 and 1. 
# Verel et al. (2011) - q ∈ {2, 4, 10} - but explore options - Higher q -> lower neutrality 
q = 4
fmq = np.digitize(fmk, bins=np.linspace(0, 1, q+1), right=True) - 1
#decimals from fmk split into q bins numbered from 0 to q-1, based on their decimal value, new values in array fmq

Choose neutrality version:

In [7]:
#replace fmk in following line with "fmp" or "fmq" when adding probabilistic or quantized neutrality to model
#note: maybe for final code: one function to create model versions and choose 
##als het lukt?
fm = np.copy(fmk) 
print(fm)

[[0.33129837 0.69195667 0.48252751 0.72554683 0.63444559 0.97270456
  0.38264242 0.52557461]
 [0.91749514 0.62603447 0.86754398 0.49760774 0.85065911 0.93576974
  0.03285659 0.16766977]
 [0.02408884 0.17248762 0.06859054 0.03385047 0.06570673 0.15214895
  0.08675703 0.42333469]
 [0.71643893 0.90442738 0.61668358 0.61218754 0.31482263 0.11946488
  0.2239394  0.60994687]]


Creating corresponding epistasis matrix

In [8]:
#creates "identity matrix": array with genome id's
im0 = np.arange(0, B, 1)
im1 = im0[np.newaxis, :]
im = np.repeat(im1, N, axis=0)
print("identity matrix")
print(im)

#Binary representation of im (just for visualisation)
imbin = np.vectorize(np.binary_repr)(im, 4) #increase 4 to to 8/16/32 with larger N 
print("binary identity matrix")
print(imbin)
#maybe useful later

identity matrix
[[0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7]
 [0 1 2 3 4 5 6 7]]
binary identity matrix
[['0000' '0001' '0010' '0011' '0100' '0101' '0110' '0111']
 ['0000' '0001' '0010' '0011' '0100' '0101' '0110' '0111']
 ['0000' '0001' '0010' '0011' '0100' '0101' '0110' '0111']
 ['0000' '0001' '0010' '0011' '0100' '0101' '0110' '0111']]


In [9]:
#important: in this version each gene influenced by K others, but genes 
# can influence >2 other genes, so some are (way) more influential than others
# print a few times to see
val = list(range(0, N))
em1 = []

for row in range(N):
    rd.shuffle(val)  #shuffle the values
    em1.append(val[:2] + [row])  #epistatic genes as pair and row number added

em1_inc = np.array(em1) #with own gene referenced
print(em1_inc)

em1 = (em1_inc[:, :-1]) #without own gene referenced


[[1 0 0]
 [2 1 1]
 [2 1 2]
 [1 3 3]]


In [10]:
#important: in this version each gene influenced by K others, and influences K others, so all equally influential
#help from stackoverflow user
#generates tree (nested dictionary) of all possible permutations of list of numbers except for a specified level
def generate_all_moved_perm_tree(level, nums):
    if len(nums) == 1:
        if level == nums[0]:
            return None
        else:
            return {nums[0]: {}}
    allowed_n = list(nums)
    if level in allowed_n:
        allowed_n.remove(level)
    result = {}
    for n in allowed_n:
        sublevel_n = list(nums)
        if n in sublevel_n:
            sublevel_n.remove(n)
        subtree = generate_all_moved_perm_tree(level + 1, sublevel_n)
        if subtree is not None:
            result[n] = subtree
    if len(result) == 0:
        return None
    return result

#picks permutation of numbers from previously generated tree, with each number selected only once
def pick_all_moved_perm(all_moved_perm_tree, picked=None):
    if picked is None:
        picked = set()
    allowed_ns = set(all_moved_perm_tree.keys()) - picked
    if not allowed_ns:
        return []
    n = rd.choice(list(allowed_ns))
    picked.add(n)
    l = [n]
    sub_tree = all_moved_perm_tree[n]
    if len(sub_tree) > 0:
        l.extend(pick_all_moved_perm(sub_tree, picked))
    return l

#generates array of unique pairs of numbers, with no number repeated in row 
def generate_unique_r(t, num_rows):
    result = []
    for _ in range(num_rows):
        row = list(zip(pick_all_moved_perm(t), pick_all_moved_perm(t)))
        while any(x[0] == x[1] for x in row):
            row = list(zip(pick_all_moved_perm(t), pick_all_moved_perm(t)))
        result.extend(row)
    return np.array(result[:num_rows])

t = generate_all_moved_perm_tree(1, range(1, N+1))
em2 = generate_unique_r(t, N)

#for comparison: (can be removed)
print("Epistasis matrix with repetition")
print(em1_inc)
print("Epistasis matrix without repetition")
em2 -= 1 #without own gene referenced

em2_inc = np.hstack((em2, np.arange(em2.shape[0]).reshape(-1, 1)))
print(em2_inc) #with own gene referenced 

##er moet een manier zijn om dit makkelijker te doen???

Epistasis matrix with repetition
[[1 0 0]
 [2 1 1]
 [2 1 2]
 [1 3 3]]
Epistasis matrix without repetition
[[1 2 0]
 [2 3 1]
 [3 0 2]
 [0 1 3]]


Calculating coefficients ai0 to aij

In [11]:
def calc_a(K, fm): 
    a_coef = []
    for r in fm:
        a = [0.0] * B  # creates list with zeros as floats for each row & X cols
        a[0] = r[0] #because ai0=Fi0 # Calculate ai0 for i = 0
        for j in range(1, B): 
            sum = 0.0 
            for l in range(0, j): #only already calculated coefficients
                if l == (l & j): #if l equal to bitwise AND of l and j (ex: 001&101->001 so TRUE, 001&100->000 so FALSE)
                    sum += a[l] 
            a[j] = r[j] - sum 
        a_coef.append(a) # append new a's into a_values array
    return a_coef

a_coef = calc_a(K, fm)
a_shape = np.reshape(a_coef, (N, B))

if np.array_equal(fm, fmk): 
    print("Coefficient matrix fmk")
elif np.array_equal(fm, fmp):
    print("Coefficient matrix fmp")
else: 
    print("Coefficient matrix fmq")
print(a_shape) 

Coefficient matrix fmk
[[ 0.33129837  0.3606583   0.15122914 -0.11763898  0.30314721 -0.02239932
  -0.4030323  -0.07768781]
 [ 0.91749514 -0.29146067 -0.04995116 -0.07847557 -0.06683602  0.37657129
  -0.76785136  0.12817812]
 [ 0.02408884  0.14839878  0.04450169 -0.18313884  0.04161789 -0.06195656
  -0.02345139  0.43327428]
 [ 0.71643893  0.18798846 -0.09975534 -0.1924845  -0.4016163  -0.3833462
   0.00887211  0.77384972]]


Compute model for genome fitness

In [12]:
#array with all individuals
coords = np.random.rand(M, N) #randomly generates coordinates within hypercube for all individuals
print("coordinates within hypercube/avg. gene values (dimension N) of individual 1 to A")
print(coords)
#replace by actual averages of individual genomes in each group in final integrated code!

ind = 3 #pick specific individual based on id - can change final function to just look at one specific individual
select = coords[ind-1,:] 
#for reference/checking, can be removed
print(a_shape)
print(coords)
print(em2_inc)


coordinates within hypercube/avg. gene values (dimension N) of individual 1 to A
[[0.39095908 0.38397755 0.61223727 0.89663243]
 [0.05493094 0.00825215 0.09704813 0.0726054 ]
 [0.68840634 0.35167659 0.75519214 0.07741936]
 [0.31803689 0.19888339 0.45866887 0.31894073]]
[[ 0.33129837  0.3606583   0.15122914 -0.11763898  0.30314721 -0.02239932
  -0.4030323  -0.07768781]
 [ 0.91749514 -0.29146067 -0.04995116 -0.07847557 -0.06683602  0.37657129
  -0.76785136  0.12817812]
 [ 0.02408884  0.14839878  0.04450169 -0.18313884  0.04161789 -0.06195656
  -0.02345139  0.43327428]
 [ 0.71643893  0.18798846 -0.09975534 -0.1924845  -0.4016163  -0.3833462
   0.00887211  0.77384972]]
[[0.39095908 0.38397755 0.61223727 0.89663243]
 [0.05493094 0.00825215 0.09704813 0.0726054 ]
 [0.68840634 0.35167659 0.75519214 0.07741936]
 [0.31803689 0.19888339 0.45866887 0.31894073]]
[[1 2 0]
 [2 3 1]
 [3 0 2]
 [0 1 3]]


In [13]:
#choose preferred epistasis version (em1: with repetition, em2: without)
em = em2_inc

#Calculate fitness components fi based on relevant coefficients and coordinates
def calculate_fitness(coefficients, epistasis, genomes):
    fit_val = np.zeros((M, N))

    for group in range(M):
        genome = genomes[group]

        for gene in range(N):
            result = 0  # Initialize with 0

            for j in range(coefficients.shape[1]): #summation & multiplication,bitwise 
                contribution = coefficients[gene, j] * genome[gene] ** (1 & j)

                for k in range(epistasis.shape[1]): 
                    epi_index = epistasis[gene, k]
                    epi_value = genome[epi_index]
                    product_term = epi_value ** ((2**k & j) / 2**k)
                    contribution *= product_term
                result += contribution
                ##aaah eindelijk?!
            fit_val[group, gene] = result

    return fit_val

fitness = calculate_fitness(a_shape, em, coords)
#M groups&avg genomes so M rows, N genes so N cols with fitness components Fi-
print("Fitness components:")
print(fitness)

#print fitness of avg. genome
fit = np.mean(fitness, axis=1,)
final = fit.reshape(-1, 1)
print("Avg. fitness of each genotype (group):")
print(final)




Fitness components:
[[0.48515858 0.54198959 0.13962084 0.33398207]
 [0.36063606 0.91262138 0.03139596 0.6870958 ]
 [0.49696112 0.82629505 0.08570096 0.65645846]
 [0.4565148  0.81826765 0.0721871  0.5768329 ]]
Avg. fitness of each genotype (group):
[[0.37518777]
 [0.4979373 ]
 [0.5163539 ]
 [0.48095061]]
