In [1]:
import re
import os
import Bio
import Bio.SeqIO
from Bio.Seq import Seq
import time

# Get human ZFs

In [2]:
zf_re = re.compile('..C.{2,4}C.{12}H.{3,7}[HC]') ##zinc finger structure, ie CXX[XX]CXXXXXXXXXXXXHXXX[XXXX]H
zf_cterm = re.compile('C.{2,4}C')

In [3]:
infile = open('full_zf.fasta','r')
full_protein_lib = {}
line = infile.readline()
while line:
    seq = ''
    name = line.split('|')[1]
    line = infile.readline()
    while line and (not line[0]=='>'):
        seq += line[:-1]
        line = infile.readline()
    full_protein_lib[name] = seq

In [4]:
zf_order = []
allzf_lib = {}
for protein in full_protein_lib:
    seq = full_protein_lib[protein]
    zf_match = re.search(zf_re,seq)
    fing_num = 1
    while zf_match:
        zf = zf_match.group()
        end = zf_match.end()
        name = protein + ' finger ' + str(fing_num)
        same_seq = 0
        for have_zf in allzf_lib:
            if allzf_lib[have_zf] == zf:
                same_seq = 1
        if not same_seq:
            allzf_lib[name] = zf
            zf_order.append(name)
        seq = seq[end:]
        zf_match = re.search(zf_re,seq)
        fing_num += 1
allzf_lib['Q9Y2K1 finger 1']='FTCDSCGFGFSCEKLLDEHVLTC'
allzf_lib['Q9GZU2 finger 6']='YECEDCGLGFVDLTDLTDHQKVH'

## Generate input files for MHC predictors with individual ZFs

In [5]:
outfile = open('maria_nterm_link_zfs_final.txt','w')
outfile.write('Allele1\tAllele2 (Same as Allele1 if analyzing a single allele)\tGene Symbol\tPeptide Sequence\tTPM (Optional)\n')
for zf in allzf_lib.keys():
    seq = allzf_lib[zf]
    outfile.write('HLA-DRB1*01:01\tHLA-DRB1*01:01\t{}\t{}TGERP\t\n'.format(zf,seq[-14:]))
outfile.close()

In [6]:
outfile = open('netmhc_nterm_link_zfs_final.txt','w')
for zf in allzf_lib.keys():
    seq = allzf_lib[zf]
    outfile.write('>{}\n{}TGERP\n'.format(zf,seq[-14:]))
outfile.close()

In [7]:
outfile = open('netmhc_cterm_link_zfs_final.txt','w')
for zf in allzf_lib.keys():
    seq = allzf_lib[zf]
    outfile.write('>{}\nTGERP{}\n'.format(zf,seq[:14]))
outfile.close()

## Read MHC output files for individual ZFs

In [8]:
infile = open('maria_nterm_final.txt','r')
maria_accepted_zfs = {}
line = infile.readline()
line = infile.readline()
while line:
    data = line.split('\t')
    name = data[2]
    score = data[7]
    if float(score) <63:
        maria_accepted_zfs[name] = allzf_lib[name]
    line = infile.readline()
infile.close()

In [9]:
infile = open('allzf_netmhc_nterm.txt')
line = infile.readline()
nterm_accepted_netmhc_zfs = {}
ready = 2
reading = 0
index = 0
while line:
    if reading:
        data = line.split()
        score = float(data[8])
        if score < 37:
            reading = 0
        elif reading == 6:
            nterm_accepted_netmhc_zfs[zf] = allzf_lib[zf]
            reading = 0
        else: 
            reading = reading + 1
    elif line[0] == '-':
        ready += 1
        if ready == 4:
            reading = 1
            ready = 0
            zf = zf_order[index]
            index = index + 1
    line = infile.readline()

In [10]:
infile = open('allzf_netmhc_cterm.txt')
line = infile.readline()
cterm_accepted_netmhc_zfs = {}
ready = 2
reading = 0
index = 0
while line:
    if reading:
        data = line.split()
        score = float(data[8])
        if score < 37:
            reading = 0
        elif reading == 6:
            cterm_accepted_netmhc_zfs[zf] = allzf_lib[zf]
            reading = 0
        else: 
            reading = reading + 1
    elif line[0] == '-':
        ready += 1
        if ready == 4:
            reading = 1
            ready = 0
            zf = zf_order[index]
            index = index + 1
    line = infile.readline()

In [11]:
netmhc_accepted_zfs = {}
for zf in nterm_accepted_netmhc_zfs.keys():
    if zf in cterm_accepted_netmhc_zfs.keys():
        netmhc_accepted_zfs[zf]=allzf_lib[zf]

## Generatate FASTA-format file of ZFs for DeepZF and ZifRC input

In [12]:
outfile = open('allzf_fasta.txt','w')
for zf in allzf_lib.keys():
    outfile.write('>{}\r{}\r'.format(zf,allzf_lib[zf]))

## Read DeepZF and ZifRC output

In [13]:
infile = open('allzf_deepzf_output.csv','r')
deepzf_codons = {}
deepzf_scores = {}
for zf in zf_order:
    codon = ''
    score = 0
    for i in range(3):
        a = float(infile.readline())
        c = float(infile.readline())
        g= float(infile.readline())
        t = float(infile.readline())
        if a >= c and a >= g and a>= t:
            codon += 'A'
            score += a
        elif c >= g and c >= t:
            codon += 'C'
            score += c
        elif g >= t:
            codon += 'G'
            score += g
        else:
            codon += 'T'
            score += a
    deepzf_codons[zf]=codon
    deepzf_scores[zf] = score
infile.close()

In [14]:
infile = open('all_zifrc.txt','r')
line = infile.readline()
line = infile.readline()
line = infile.readline()
zifrc_codons = {}
zifrc_scores = {}
while line:
    data = line.split('\t')
    name = data[1][:-1]
    line = infile.readline()
    line = infile.readline()
    line = infile.readline()
    line = infile.readline()
    line = infile.readline()
    codon = ''
    score = 0
    for i in range(3):
        base_data = line.split()[1:]
        score = score + float(max(base_data))
        a= float(base_data[0])
        c=float(base_data[1])
        g = float(base_data[2])
        t = float(base_data[3])
        if a == float(max(base_data)):
            codon = codon + 'A'
        elif c == float(max(base_data)):
            codon = codon + 'C'
        elif g == float(max(base_data)):
            codon = codon + 'G'
        elif t == float(max(base_data)):
            codon= codon + 'T'
        line = infile.readline()
    line = infile.readline()
    line = infile.readline()
    line = infile.readline()
    line = infile.readline()
    line = infile.readline()
    zifrc_codons[name] = codon
    zifrc_scores[name] = score
infile.close()

## Export ZF affinity summary files

In [15]:
zifrc_file = open('ZifRC_data.txt','w')
deepzf_file = open('DeepZF_data.txt','w')
for zf in allzf_lib:
    if zf in zifrc_codons:
        zifrc_file.write('{}\t{}\t{}\t{}\n'.format(zf,allzf_lib[zf],zifrc_codons[zf],zifrc_scores[zf]))
    deepzf_file.write('{}\t{}\t{}\t{}\n'.format(zf,allzf_lib[zf],deepzf_codons[zf],deepzf_scores[zf]))
zifrc_file.close()
deepzf_file.close()

## Determine ZF specificity for MARIA-accepted and NetMHCIIpan-accepted ZFs

In [16]:
possible_codons = []
for n1 in ['A', 'C', 'G','T']:
    for n2 in ['A', 'C', 'G','T']:
        for n3 in ['A', 'C', 'G','T']:
            possible_codons.append(n1+n2+n3)

In [17]:
maria_zifrc_codons = {}
for codon in possible_codons:
    maria_zifrc_codons[codon]= []
for zf in maria_accepted_zfs:
    if zf in zifrc_codons:
        maria_zifrc_codons[zifrc_codons[zf]].append(zf)

In [18]:
maria_deepzf_codons = {}
for codon in possible_codons:
    maria_deepzf_codons[codon]= []
for zf in maria_accepted_zfs:
    if zf in deepzf_codons:
        maria_deepzf_codons[deepzf_codons[zf]].append(zf)

In [19]:
netmhc_zifrc_codons = {}
for codon in possible_codons:
    netmhc_zifrc_codons[codon]= []
for zf in netmhc_accepted_zfs:
    if zf in zifrc_codons:
        netmhc_zifrc_codons[zifrc_codons[zf]].append(zf)

In [20]:
netmhc_deepzf_codons = {}
for codon in possible_codons:
    netmhc_deepzf_codons[codon]= []
for zf in netmhc_accepted_zfs:
    if zf in deepzf_codons:
        netmhc_deepzf_codons[deepzf_codons[zf]].append(zf)

In [21]:
maria_zifrc_missing_codons = []
for codon in maria_zifrc_codons.keys():
    if len(maria_zifrc_codons[codon])==0:
        maria_zifrc_missing_codons.append(codon)
        
netmhc_zifrc_missing_codons = []
for codon in netmhc_zifrc_codons.keys():
    if len(netmhc_zifrc_codons[codon])==0:
        netmhc_zifrc_missing_codons.append(codon)

maria_deepzf_missing_codons = []
for codon in maria_deepzf_codons.keys():
    if len(maria_deepzf_codons[codon])==0:
        maria_deepzf_missing_codons.append(codon)
        
netmhc_deepzf_missing_codons = []
for codon in netmhc_deepzf_codons.keys():
    if len(netmhc_deepzf_codons[codon])==0:
        netmhc_deepzf_missing_codons.append(codon)

## Generate input files for ZF pairs

In [22]:
file_num =1
zf_count = 1
outfile = open('maria input pairs final/final_pair_{}.txt'.format(file_num), 'w')
outfile.write('Allele1\tAllele2 (Same as Allele1 if analyzing a single allele)\tGene Symbol\tPeptide Sequence\tTPM (Optional)\n')
for zf_1 in maria_accepted_zfs.keys():
    for zf_2 in maria_accepted_zfs.keys():
        outfile.write('HLA-DRB1*01:01\tHLA-DRB1*01:01\t{}-{}\t{}TGERP{}\t\n'.format(zf_1,zf_2,maria_accepted_zfs[zf_1][-14:],maria_accepted_zfs[zf_2][:14]))
        zf_count += 1
        if zf_count >20000:
            outfile.close()
            file_num += 1
            zf_count = 1
            outfile = outfile = open('maria input pairs final/final_pair_{}.txt'.format(file_num), 'w')
            outfile.write('Allele1\tAllele2 (Same as Allele1 if analyzing a single allele)\tGene Symbol\tPeptide Sequence\tTPM (Optional)\n')
outfile.close()

In [23]:
file_num =1
zf_count = 1
outfile = open('netmhc input pairs final/final_pair_{}.txt'.format(file_num), 'w')
for zf_1 in netmhc_accepted_zfs.keys():
    for zf_2 in netmhc_accepted_zfs.keys():
        outfile.write('>{}-{}\n{}TGERP{}\t\n'.format(zf_1,zf_2,netmhc_accepted_zfs[zf_1][-14:],netmhc_accepted_zfs[zf_2][:14]))
        zf_count += 1
        if zf_count >5000:
            outfile.close()
            file_num += 1
            zf_count = 1
            outfile = outfile = open('netmhc input pairs final/final_pair_{}.txt'.format(file_num), 'w')
outfile.close()

## Read output files from ZF pairs

In [24]:
maria_zf_transitions = {}
for file_num in range(1,45): #cycling through all MARIA output files for zf-zf pairs
    infile = open('maria final pairs/maria final {}.txt'.format(file_num),'r')
    line = infile.readline()
    line = infile.readline()
    while line:
        data = line.split('\t')
        pair = data[2]
        seq = data[3]
        score = float(data[7])
        zfs = pair.split('-')
        if score < 63: #threshold for predicted immunogenicity
            zfs = pair.split('-')
            zf1 = zfs[0]
            
            zf2 = zfs[1]
            if (zf1 in maria_accepted_zfs and zf2 in maria_accepted_zfs):
                
                if zf1 in maria_zf_transitions.keys(): #creating dictionary that maps a zf to all zfs that can follow it
                    maria_zf_transitions[zf1].append(zf2)
                else: 
                    maria_zf_transitions[zf1] = [zf2]
        line = infile.readline()
    infile.close()

In [25]:
netmhc_pair_list = []
netmhc_directory = os.fsencode('netmhc final pairs')
for file in os.listdir(netmhc_directory):
    file_num = os.fsdecode(file[:-4])
    input_file = open('netmhc input pairs final/final_pair_{}.txt'.format(file_num), 'r')
    output_file = open('netmhc final pairs/{}.txt'.format(file_num),'r')
    out_line = output_file.readline()
    ready = 2
    reading = 0
    index = 0
    
    while out_line:
        if reading:
            data = out_line.split()
            score = float(data[8])
            if score < 37:
                reading = 0
            elif reading == 19:
                netmhc_pair_list.append(pair_name)
                reading = 0
            else: 
                reading = reading + 1
        elif out_line[0] == '-':
            ready += 1
            if ready == 4:
                reading = 1
                ready = 0
                in_line = input_file.readline()
                pair_name = in_line[1:-1].replace('-','\t')
                in_line = input_file.readline()
        out_line = output_file.readline()
    input_file.close()
    output_file.close()
    
netmhc_zf_transitions = {}
for pair in netmhc_pair_list:
    zfs = pair.split('\t')
    if zfs[0] in netmhc_zf_transitions:
        netmhc_zf_transitions[zfs[0]].append(zfs[1])
    else:
        netmhc_zf_transitions[zfs[0]]=[zfs[1]]

## Generate files with acceptable ZF pairs

In [28]:
outfile = open('MARIA_transitions.txt','w')
for zf1 in maria_zf_transitions:
    for zf2 in maria_zf_transitions[zf1]:
        outfile.write('{} {}\n'.format(zf1,zf2))
outfile.close()

In [29]:
outfile = open('NetMHCII_transitions.txt','w')
for zf1 in netmhc_zf_transitions:
    for zf2 in netmhc_zf_transitions[zf1]:
        outfile.write('{} {}\n'.format(zf1,zf2))
outfile.close()

## Generate data sets collating DNA-binding and MHC results

In [30]:
maria_zifrc_zf_possible_per_codon = {}
for zf1 in maria_zf_transitions.keys():
    for codon in possible_codons:
        name = zf1 + codon
        zf_list = []
        for zf2 in maria_zf_transitions[zf1]:
            if zf2 in zifrc_codons and zifrc_codons[zf2] == codon:
                zf_list.append(zf2)
        maria_zifrc_zf_possible_per_codon[name] = zf_list
        
maria_zifrc_codon_transitions = {}
for zf1 in maria_zf_transitions.keys():
    if zf1 in zifrc_codons:
        codon2 = zifrc_codons[zf1]
        for zf2 in maria_zf_transitions[zf1]:
            if zf2 in zifrc_codons:
                codon1 = zifrc_codons[zf2]
                if codon1 in maria_zifrc_codon_transitions.keys():
                    if not (codon2 in maria_zifrc_codon_transitions[codon1]):
                        maria_zifrc_codon_transitions[codon1].append(codon2)
                else:
                    maria_zifrc_codon_transitions[codon1] = [codon2]
for codon in possible_codons:
    if not codon in maria_zifrc_codon_transitions.keys():
        maria_zifrc_codon_transitions[codon] = []

In [31]:
netmhc_zifrc_zf_possible_per_codon = {}
for zf1 in netmhc_zf_transitions.keys():
    for codon in possible_codons:
        name = zf1 + codon
        zf_list = []
        for zf2 in netmhc_zf_transitions[zf1]:
            if zf2 in zifrc_codons and zifrc_codons[zf2] == codon:
                zf_list.append(zf2)
        netmhc_zifrc_zf_possible_per_codon[name] = zf_list
        
netmhc_zifrc_codon_transitions = {}
for zf1 in netmhc_zf_transitions.keys():
    if zf1 in zifrc_codons:
        codon2 = zifrc_codons[zf1]
        for zf2 in netmhc_zf_transitions[zf1]:
            if zf2 in zifrc_codons:
                codon1 = zifrc_codons[zf2]
                if codon1 in netmhc_zifrc_codon_transitions.keys():
                    if not (codon2 in netmhc_zifrc_codon_transitions[codon1]):
                        netmhc_zifrc_codon_transitions[codon1].append(codon2)
                else:
                    netmhc_zifrc_codon_transitions[codon1] = [codon2]
for codon in possible_codons:
    if not codon in netmhc_zifrc_codon_transitions.keys():
        netmhc_zifrc_codon_transitions[codon] = []

In [32]:
maria_deepzf_zf_possible_per_codon = {}
for zf1 in maria_zf_transitions.keys():
    for codon in possible_codons:
        name = zf1 + codon
        zf_list = []
        for zf2 in maria_zf_transitions[zf1]:
            if deepzf_codons[zf2] == codon:
                zf_list.append(zf2)
        maria_deepzf_zf_possible_per_codon[name] = zf_list
        
maria_deepzf_codon_transitions = {}
for zf1 in maria_zf_transitions.keys():
    codon2 = deepzf_codons[zf1]
    for zf2 in maria_zf_transitions[zf1]:
        codon1 = deepzf_codons[zf2]
        if codon1 in maria_deepzf_codon_transitions.keys():
            if not (codon2 in maria_deepzf_codon_transitions[codon1]):
                maria_deepzf_codon_transitions[codon1].append(codon2)
        else:
            maria_deepzf_codon_transitions[codon1] = [codon2]
for codon in possible_codons:
    if not codon in maria_deepzf_codon_transitions.keys():
        maria_deepzf_codon_transitions[codon] = []

In [33]:
netmhc_deepzf_zf_possible_per_codon = {}
for zf1 in netmhc_zf_transitions.keys():
    for codon in possible_codons:
        name = zf1 + codon
        zf_list = []
        for zf2 in netmhc_zf_transitions[zf1]:
            if deepzf_codons[zf2] == codon:
                zf_list.append(zf2)
        netmhc_deepzf_zf_possible_per_codon[name] = zf_list
        
netmhc_deepzf_codon_transitions = {}
for zf1 in netmhc_zf_transitions.keys():
    codon2 = deepzf_codons[zf1]
    for zf2 in netmhc_zf_transitions[zf1]:
        codon1 = deepzf_codons[zf2]
        if codon1 in netmhc_deepzf_codon_transitions.keys():
            if not (codon2 in netmhc_deepzf_codon_transitions[codon1]):
                netmhc_deepzf_codon_transitions[codon1].append(codon2)
        else:
            netmhc_deepzf_codon_transitions[codon1] = [codon2]
for codon in possible_codons:
    if not codon in netmhc_deepzf_codon_transitions.keys():
        netmhc_deepzf_codon_transitions[codon] = []

In [38]:
maria_zifrc_9mers = {}
for first_codon in possible_codons:
    for second_codon in possible_codons:
        for third_codon in possible_codons:
            this_chain = seq_first(third_codon + second_codon + first_codon,maria_zifrc_codons,maria_zifrc_missing_codons,
                                   maria_zifrc_zf_possible_per_codon,maria_zifrc_codon_transitions,maria_zf_transitions)
            if this_chain:
                maria_zifrc_9mers[third_codon + second_codon + first_codon] = 1
            else:
                maria_zifrc_9mers[third_codon + second_codon + first_codon] = 0

In [39]:
netmhc_zifrc_9mers = {}
for first_codon in possible_codons:
    for second_codon in possible_codons:
        for third_codon in possible_codons:
            this_chain = seq_first(third_codon + second_codon + first_codon,netmhc_zifrc_codons,netmhc_zifrc_missing_codons,
                                   netmhc_zifrc_zf_possible_per_codon,netmhc_zifrc_codon_transitions,netmhc_zf_transitions)
            if this_chain:
                netmhc_zifrc_9mers[third_codon + second_codon + first_codon] = 1
            else:
                netmhc_zifrc_9mers[third_codon + second_codon + first_codon] = 0

In [40]:
maria_deepzf_9mers = {}
for first_codon in possible_codons:
    for second_codon in possible_codons:
        for third_codon in possible_codons:
            this_chain = seq_first(third_codon + second_codon + first_codon,maria_deepzf_codons,maria_deepzf_missing_codons,
                                   maria_deepzf_zf_possible_per_codon,maria_deepzf_codon_transitions,maria_zf_transitions)
            if this_chain:
                maria_deepzf_9mers[third_codon + second_codon + first_codon] = 1
            else:
                maria_deepzf_9mers[third_codon + second_codon + first_codon] = 0

In [41]:
netmhc_deepzf_9mers = {}
for first_codon in possible_codons:
    for second_codon in possible_codons:
        for third_codon in possible_codons:
            this_chain = seq_first(third_codon + second_codon + first_codon,netmhc_deepzf_codons,netmhc_deepzf_missing_codons,
                                   netmhc_deepzf_zf_possible_per_codon,netmhc_deepzf_codon_transitions,netmhc_zf_transitions)
            if this_chain:
                netmhc_deepzf_9mers[third_codon + second_codon + first_codon] = 1
            else:
                netmhc_deepzf_9mers[third_codon + second_codon + first_codon] = 0

## Functions supporting ZF array creation

In [34]:
def split_seq(sequence):
    if len(sequence) < 3:
        return []
    if len(sequence) % 3 != 0:
        sequence = sequence[:-(len(sequence) % 3)]
    temp_seq =sequence
    output = []
    while temp_seq:
        output.append(temp_seq[-3:])
        temp_seq = temp_seq[:-3]
    return output

def seq_first(sequence,codon_zfs,missing_codons,zf_possible_per_codon,codon_transitions,zf_transitions):
    codon_list = split_seq(sequence)
    for i in range(len(codon_list)):
        if codon_list[i] in missing_codons:
            return None
        if i < len(codon_list) - 1:
            if not (codon_list[i+1] in codon_transitions[codon_list[i]]):
                return None
    for zf_1 in codon_zfs[codon_list[0]]:
        if zf_1 in zf_transitions.keys():
            for zf_2 in zf_possible_per_codon[zf_1+codon_list[1]]:
                zf_array = seq_first_recursive(codon_list[1:],zf_2,zf_transitions,zf_possible_per_codon)
                if zf_array:
                    return [zf_1] + zf_array
    return None

def seq_first_recursive(codon_list,zf_1,zf_transitions,zf_possible_per_codon):
    if len(codon_list) == 1:
        return [zf_1]
    if zf_1 in zf_transitions.keys():
        for zf_2 in zf_possible_per_codon[zf_1+codon_list[1]]:
            zf_array = seq_first_recursive(codon_list[1:],zf_2,zf_transitions,zf_possible_per_codon)
            if zf_array:
                return [zf_1] + zf_array
    return None

In [35]:
class zf_tree:
    #object represents a set of zinc fingers that bind to the same DNA sequence. Each node represents a given zinc finger domain,
    #while the children are themselves trees which are the zfs that can directly follow the parent tree's ZF
    def __init__(self, zf):
        self.zf = zf
        self.children = []
    def add_child(self, child):
        self.children.append(child)
    def return_arrays(self):
        if self.children:
            output = []
            for child in self.children:
                for array in child.return_arrays():
                    output.append([self.zf] + array)
            return output
        else:
            return [[self.zf]]
    def return_best_array(self,zf_score):
        if self.children:
            best_score = 0
            best_child = []
            for child in self.children:
                child_best = child.return_best_array()
                child_score = grade_array(child_best,zf_score)
                if child_score > best_score:
                    best_child = child_best
                    best_score = child_score
            return [self.zf] + best_child
        else:
            return[self.zf]
    def return_best_score(self,zf_score):
        if self.children:
            best_score = 0
            for child in self.children:
                this_score = child.return_best_score(zf_score)
                if this_score > best_score:
                    best_score = this_score
            return best_score + zf_score[self.zf]
        else:
            return zf_score[self.zf]
    def return_array_nums(self):
        if self.children:
            array_num = 0
            for child in self.children:
                array_num = array_num + child.return_array_nums()
            return array_num
        else:
            return 1
        

## Find number of targetable sites across promoter set

In [42]:
class missing_dict(dict):
    def __missing__ (self,key):
        return 1

In [45]:
def check_promoters(codon_zfs,missing_codons,zf_possible_per_codon,codon_transitions,zf_transitions,enna_dic):
    promoters_checked = 0
    start = time.time()
    promoter_site_nums = []
    checked_12nucs = missing_dict()
    for promoter in Bio.SeqIO.parse('epd_promoters.txt', 'fasta'):
        promoter_site = 0
        sites_checked = 0
        promoter_seq = str(promoter.seq)
        strands= (promoter_seq,complement_DNA(promoter_seq))
        for sequence in strands:
            for i in range(len(sequence)-24):
                site = sequence[i:i+24]
                sites_checked += 1
                inaccessible = 0
                for i in range(0,13,3):
                    twelvemer = site[i:i+12]
                    if (not checked_12nucs[twelvemer]):
                        inaccessible = 1
                for i in range(6):
                    subsite = site[i*3:i*3+9]
                    if 'N' in subsite or not (enna_dic[subsite]):
                        inaccessible = 1
                        break
                if not inaccessible:
                    if seq_first(site,codon_zfs,missing_codons,zf_possible_per_codon,codon_transitions,zf_transitions):
                        promoter_site += 1
                    else:
                        for i in range(0,13,3):
                            twelvemer = site[i:i+12]
                            if not seq_first(twelvemer,codon_zfs,missing_codons,zf_possible_per_codon,codon_transitions,zf_transitions):
                                checked_12nucs[twelvemer] = 0
                        if seq_first(site[:12],codon_zfs,missing_codons,zf_possible_per_codon,codon_transitions,zf_transitions):
                            if not seq_first(site[-12:],codon_zfs,missing_codons,zf_possible_per_codon,codon_transitions,zf_transitions):
                                checked_12nucs[site[-12:]] = 0
                        else:
                            checked_12nucs[site[:12]] = 0
        promoter_site_nums.append(promoter_site)
        promoters_checked += 1
        outfile.close()
        if promoters_checked %10==0:
            current = time.time()
            print(promoters_checked)
            rate = (current - start)/ promoters_checked
            togo = rate * (29598-promoters_checked)
            print('ETA {:.2f} minutes'.format(togo/60))
    return promoter_site_nums

In [None]:
maria_deepzf_promoter_site_nums=check_promoters(maria_deepzf_codons,maria_deepzf_missing_codons,
                            maria_deepzf_zf_possible_per_codon,maria_deepzf_codon_transitions,
                            maria_zf_transitions,maria_deepzf_9mers)

In [None]:
maria_zifrc_promoter_site_nums=check_promoters(maria_zifrc_codons,maria_zifrc_missing_codons,
                            maria_zifrc_zf_possible_per_codon,maria_zifrc_codon_transitions,
                            maria_zf_transitions,maria_zifrc_9mers)

In [None]:
netmhc_deepzf_promoter_site_nums=check_promoters(netmhc_deepzf_codons,netmhc_deepzf_missing_codons,
                            netmhc_deepzf_zf_possible_per_codon,netmhc_deepzf_codon_transitions,
                            netmhc_zf_transitions,netmhc_deepzf_9mers)

In [None]:
netmhc_zifrc_promoter_site_nums=check_promoters(netmhc_zifrc_codons,netmhc_zifrc_missing_codons,
                            netmhc_zifrc_zf_possible_per_codon,netmhc_zifrc_codon_transitions,
                            netmhc_zf_transitions,netmhc_zifrc_9mers)