https://github.com/acmater/NK_Benchmarking/blob/master/utils/nk_utils/NK_landscape.py

In [1]:
import numpy as np
import itertools
import csv
np.random.seed(42)

In [2]:
import numpy as np
import itertools

def collapse_single(protein):
    """Takes any iterable form of a single amino acid character sequence and returns a string representing that sequence."""
    return "".join([str(i) for i in protein])

def hamming(str1, str2):
    """Calculates the Hamming distance between 2 strings"""
    return sum(c1 != c2 for c1, c2 in zip(str1, str2))

def all_genotypes(N, AAs):
    """Fills the sequence space with all possible genotypes."""
    return np.array(list(itertools.product(AAs, repeat=N)))

def neighbors(sequence, sequence_space):
    """Gets neighbours of a sequence in sequence space based on Hamming distance."""
    hammings = []
    for i in sequence_space:
        hammings.append((i, hamming(sequence,i)))
    return [sequence for sequence, distance in hammings if distance==1]

def custom_neighbors(sequence, sequence_space, d):
    """Gets neighbours of a sequence in sequence space based on Hamming distance d."""
    hammings = []
    for i in sequence_space:
        hammings.append((i, hamming(sequence,i)))
    return [sequence for sequence, distance in hammings if distance==d]

def genEpiNet(N, K):
    """Generates a random epistatic network for a sequence of length
    N with, on average, K connections"""
    return {
        i: sorted(np.random.choice(
            [n for n in range(N) if n != i],
            K,
            replace=False
        ).tolist() + [i])
        for i in range(N)
    }

def genEpiNet_local(N, K=1):
    """
    Generates a local epistatic network for a sequence of length N.
    Each position interacts only with itself and its K next neighbors (no wrap-around).
    For K=1: [i, i+1]
    For K=2: [i, i+1, i+2]
    * For last positions, include as many neighbors as possible.
    """
    epi_net = {}
    for i in range(N):
        epi_net[i] = list(range(i, min(i + K + 1, N)))
    return epi_net


def fitness_i(sequence, i, epi, mem):
    """Assigns a (random) fitness value to the ith amino acid that interacts with its neighbors."""
    key = tuple(zip(epi[i], sequence[epi[i]]))
    if key not in mem:
        mem[key] = np.random.uniform(0, 1)
    return mem[key]

def fitness(sequence, epi, mem):
    """Obtains a fitness value for the entire sequence by averaging over individual amino acids."""
    return np.mean([
        fitness_i(sequence, i, epi, mem)
        for i in range(len(sequence))
    ])

def makeNK_local(N, K, AAs):
    """Make NK landscape with local (neighbor-only) interactions."""
    f_mem = {}
    epi_net = genEpiNet_local(N, K)
    sequenceSpace = all_genotypes(N, AAs)
    land = [(x, fitness(x, epi=epi_net, mem=f_mem)) for x in sequenceSpace]
    return land, sequenceSpace, epi_net, f_mem

In [3]:
genEpiNet(5, K=2)

{0: [0, 2, 4], 1: [1, 2, 4], 2: [0, 2, 4], 3: [0, 1, 3], 4: [1, 2, 4]}

In [4]:
genEpiNet_local(5, K=2)

{0: [0, 1, 2], 1: [1, 2, 3], 2: [2, 3, 4], 3: [3, 4], 4: [4]}

In [45]:
for i in range(1,4):
    if __name__ == "__main__":
        land_K2, seq, _,f_mem = makeNK_local(10,i,"ATCG")
        genotypes=["".join(sublist) for sublist in seq]
        phenotypes=[sublist[1] for sublist in land_K2]
    # Open a new CSV file
    with open('NK_local'+str(i)+'.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        
        # Write the header (optional)
        writer.writerow(['Genotype', 'Phenotype'])
        
        # Write the data
        for item1, item2 in zip(genotypes, phenotypes):
            writer.writerow([item1, item2])