### Overview
This is a collection of anti-fungal peptide sequences. All the sequences in this notebook have been run through the IDT codon optimizer. 

#### Preparing DNA Sequences
Import the sequences from a csv file into an array and generate the complement strands. 

In [2]:
import csv

### Import AMP Array
# Index corresponds to index in "Anti-Fungal Payloads" Google sheet

AMPs = []
with open('antifungal_payloads.csv', 'r') as file:
    ampreader = csv.reader(file)
    for row in ampreader:
        if row[4] != 'NT Sequence':
            row[4] = row[4].replace(" ", "") # removes spaces from sequence
            AMPs.append(row[4])

### Generate complementary DNA strand
def comp(DNA):
    
    DNA_comp = ""
    for i in range(len(DNA)):
        if DNA[i].upper() == "A":
            DNA_comp += "T"
        if DNA[i].upper() == "T":
            DNA_comp += "A"
        if DNA[i].upper() == "C":
            DNA_comp += "G"
        if DNA[i].upper() == "G":
            DNA_comp += "C"
    return DNA_comp.upper()

AMP_comp = []
for seq in AMPs:
    AMP_comp.append(comp(seq))


#### Screening
Check all the sequences for illegal restriction sites, and if found, makes appropriate edits.

In [6]:
### Codon Maps
# Maps codons to amino acids
nt_aa = {
    'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
    'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
    'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
    'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
    'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
    'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
    'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
    'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
    'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
    'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
    'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
    'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
    'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
    'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
    'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*',
    'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W',
    }
# Maps amino acids to codons
aa_nt = {'I': ['ATA', 'ATC', 'ATT'],
             'M': ['ATG'],
             'T': ['ACA', 'ACC', 'ACG', 'ACT'],
             'N': ['AAC', 'AAT'],
             'K': ['AAA', 'AAG'],
             'S': ['AGC', 'AGT', 'TCA', 'TCC', 'TCG', 'TCT'],
             'R': ['AGA', 'AGG'],
             'L': ['CTA', 'CTC', 'CTT', 'TTA', 'TTG'],
             'P': ['CCA', 'CCC', 'CCG', 'CCT'],
             'H': ['CAC', 'CAT'],
             'Q': ['CAA', 'CAG'],
             'R': ['CGA', 'CGC', 'CGG', 'CGT'],
             'V': ['GTA', 'GTC', 'GTG', 'GTT'],
             'A': ['GCA', 'GCC', 'GCG', 'GCT'],
             'D': ['GAC', 'GAT'],
             'E': ['GAA', 'GAG'],
             'G': ['GGA', 'GGC', 'GGG', 'GGT'],
             'F': ['TTC', 'TTT'],
             'Y': ['TAC', 'TAT'],
             'C': ['TGC', 'TGT'],
             'W': ['TGG']}


### Restriction Sites
# MoClo restriction sites - BBF RFC 94
bbsi = 'gaagac'
bsai = 'ggtctc'

### Restriction sites from BBF RFC 10
ecori = 'gaattc'
psti = 'ctgcag'
noti = 'gcggccgc'
xbai = 'tctaga'
spei = 'actagt'

# Restriction sites from BBF RFC 25
ngomiv = 'gccgcc'
agei = 'accggt'

# Dictionary of restriction sites to check
rsites = {'bbsi': bbsi, 'bsai': bsai, 'ecori': ecori, 'psti': psti, 
          'noti': noti, 'xbai': xbai, 'spei': spei, 'ngomiv': ngomiv, 
          'agei': agei}

### Functions

# Screens for restriction sites and makes appropriate changes
def rsite_screen(rsites, seq):
    for site in rsites:
        if (site in seq):
            pos = seq.find(site) # position of the cut site
            print(pos)
            def replace(pos):
                cod1 = seq[pos:pos+3] 
                print(cod1)
                aa = nt_aa[cod1]
                print(aa)
                ind = aa_nt[aa].index(cod1)
                print(ind)
                cod2 = aa_nt[aa][(ind + 1) % len(aa_nt[aa])]
                print(cod2)
                seq = seq[0:pos-1] + cod2 + seq[pos + 3:]
                print(seq)
            if (pos + 1) % 3 == 0:
                replace(pos)
            elif (pos + 1) % 3 == 1:
                replace(pos - 1)
            else:
                replace(pos - 2)
            rsite_screen(rsites, seq)

test_seq = 'aaagccgcc'

rsite_screen(rsites, test_seq)
test_seq

# Check coding and complementary strands for restriction sites
#for seq in AMPs:
   # rsite_screen(rsites, seq)
            

'aaagccgcc'

#### Writing the sequences back to a csv file