# Instructions

Before running, copy the files `allele_dic_with_WT.pkl`, `aminotonumber.pkl`, and `translate.pkl` to this directory.

# Load data

In [2]:
import pickle

### Barcodes

In [3]:
with open('data/allele_dic_with_WT.pkl', 'rb') as fobj:
    barcodes = pickle.load(fobj)

### AA ordering

In [4]:
with open('data/aminotonumber.pkl', 'rb') as fobj:
    aa_to_index = pickle.load(fobj)

In [5]:
# Reverse of aa_to_index

aas_ordered = [None] * len(aa_to_index)

for aa, i in aa_to_index.items():
    assert aas_ordered[i] is None
    aas_ordered[i] = aa
    
assert None not in aas_ordered

### Genetic code

In [6]:
# Load RNA version from file
with open('data/translate.pkl', 'rb') as fobj:
    codon_to_aa = pickle.load(fobj)

In [7]:
# DNA version
dna_codon_to_aa = {codon.replace('U', 'T'): aa for codon, aa in codon_to_aa.items()}

### Sequence

In [8]:
with open('data/wt_seq.txt') as fobj:
    wt_seq = fobj.read()

In [9]:
prot_len = len(wt_seq)

# Build matrix

In [10]:
import numpy as np

In [11]:
counts = np.zeros((prot_len + 1, 21), dtype=int)

In [12]:
for res, codon in barcodes.values():
    if codon == 'WT':
        # Skip wild-type
        continue
        
    aa = dna_codon_to_aa[codon]
    aa_index = aa_to_index[aa]
    
    counts[res - 1, aa_index] += 1  # res starts at 1

# Save

In [13]:
np.save('barcode_counts.npy', counts)