In [None]:
### This code will take in a TF list as an excel file and return a pd dataframe of the relevant features and gene region sequences for each start and stop codon of interest.
# Contact scro4473@ox.ac.uk for questions and queries

In [None]:
#Import packages
!pip install biopython
!pip install primer3-py


import pandas as pd
from Bio import SeqIO
import primer3 as p3
from tqdm import tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Functions

#ReverseComplement
def revComp(inputSeq):
  """
  This function takes an input sequence and returns the reverse complement.

  Input: inputSeq in str format
  Output: revComp in str format

  """
  complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
  
  revComp = ""
  for base in inputSeq[::-1]:
    revComp += complement[(base.upper())]

  return revComp

In [None]:
#This is the input file containing the TFs we want to query
#Imported as a pandas dataframe
queryTFsdf = pd.read_excel('/content/drive/My Drive/bioinformatics project/TFs.xlsx')
#Ths contains 765 TFs total

#This is the .gtf file with annotations for each gene on the reference genome
#Note that I'll use these for the info categories of the final pandas df (rather than the transgenic genome annotations)
refAnnotationsHeaders = ["Chromosome", "Source", "Gene_Region", "Start", "Stop", "Score", "Strand", "Frame", "Attribute"]
refGenomeAnnotation = pd.read_csv(r'/content/drive/My Drive/bioinformatics project/dmel-all-r6.48.gtf', sep = "\t", header = None, index_col = False, names = refAnnotationsHeaders)

#This is the FASTA file of the reference genome sequence
refSeqPerChromosome = {}
for seq in SeqIO.parse(open('/content/drive/My Drive/bioinformatics project/dmel-all-chromosome-r6.48.fasta'), 'fasta'):
  refSeqPerChromosome[seq.id] = seq.seq

#This is the nos-Cas9 on 2 sequence - use for chromosome 3 only
on2SeqPerChromosome = {}
for seq in SeqIO.parse(open('/content/drive/My Drive/bioinformatics project/dmel6-nos-Cas9_on_2.fasta'), 'fasta'):
  on2SeqPerChromosome[seq.id] = seq.seq

#This is the nos-Cas9 on 3 sequence - use for all other chromosomes
on3SeqPerChromosome = {}
for seq in SeqIO.parse(open('/content/drive/My Drive/bioinformatics project/dmel6-nos-Cas9_on_3.fasta'), 'fasta'):
  on3SeqPerChromosome[seq.id] = seq.seq

In [None]:
#This is to reformat the "Attribute" category in refGenomeAnnotation, to extract Gene_ID, Gene_Symbol, and Transcript ID
index = 0

#Add new categories to the dataframe
refGenomeAnnotation = refGenomeAnnotation.assign(Gene_ID = "", Gene_Symbol = "", Transcript_ID = "")

#For each attribute value, extract the gene ID and symbol and add this to the new categories
for attribute in refGenomeAnnotation['Attribute']:
  fullatt = (refGenomeAnnotation.loc[index]["Attribute"]).replace(";", "")
  fullatt = fullatt.replace('"', "")
  fullattsplit = fullatt.split(" ")
  refGenomeAnnotation.at[index,"Gene_ID"] = fullattsplit[1]
  refGenomeAnnotation.at[index,"Gene_Symbol"] = fullattsplit[3]
  if len(fullattsplit) == 8:
    refGenomeAnnotation.at[index,"Transcript_ID"] = fullattsplit[5]
  index+=1

#Delete Attributes category
del refGenomeAnnotation["Attribute"]

display(refGenomeAnnotation)

Unnamed: 0,Chromosome,Source,Gene_Region,Start,Stop,Score,Strand,Frame,Gene_ID,Gene_Symbol,Transcript_ID
0,X,FlyBase,gene,19961297,19969323,.,+,.,FBgn0031081,Nep3,
1,X,FlyBase,mRNA,19961689,19968479,.,+,.,FBgn0031081,Nep3,FBtr0070000
2,X,FlyBase,5UTR,19961689,19961845,.,+,.,FBgn0031081,Nep3,FBtr0070000
3,X,FlyBase,exon,19961689,19961845,.,+,.,FBgn0031081,Nep3,FBtr0070000
4,X,FlyBase,exon,19963955,19964071,.,+,.,FBgn0031081,Nep3,FBtr0070000
...,...,...,...,...,...,...,...,...,...,...,...
549156,2L,FlyBase,CDS,814243,815221,.,-,0,FBgn0031281,Saf6,FBtr0332974
549157,2L,FlyBase,CDS,813951,814184,.,-,2,FBgn0031281,Saf6,FBtr0332974
549158,2L,FlyBase,CDS,813460,813893,.,-,2,FBgn0031281,Saf6,FBtr0332974
549159,2L,FlyBase,stop_codon,813457,813459,.,-,0,FBgn0031281,Saf6,FBtr0332974


In [None]:
#Select only rows that TFs are in, and keep only the start and stop codon gene regions

refGenomeAnnotation = refGenomeAnnotation.loc[refGenomeAnnotation["Gene_Region"].isin(["start_codon", "stop_codon"])]

TFsdf = refGenomeAnnotation[["Gene_ID", "Transcript_ID", "Chromosome", "Gene_Region", "Start", "Stop", "Strand"]].loc[refGenomeAnnotation["Gene_ID"].isin(queryTFsdf["Flybase_ID"])]

display(TFsdf)

Unnamed: 0,Gene_ID,Transcript_ID,Chromosome,Gene_Region,Start,Stop,Strand
141,FBgn0031086,FBtr0070032,X,start_codon,20092383,20092385,-
143,FBgn0031086,FBtr0070032,X,stop_codon,20091603,20091605,-
453,FBgn0000022,FBtr0070072,X,start_codon,370094,370096,+
455,FBgn0000022,FBtr0070072,X,stop_codon,370697,370699,+
461,FBgn0004170,FBtr0070073,X,start_codon,396177,396179,+
...,...,...,...,...,...,...,...
546480,FBgn0031375,FBtr0330200,2L,stop_codon,1951056,1951058,-
546486,FBgn0031375,FBtr0330201,2L,start_codon,1954323,1954325,-
546489,FBgn0031375,FBtr0330201,2L,stop_codon,1950927,1950929,-
546495,FBgn0031375,FBtr0330202,2L,start_codon,1954323,1954325,-


In [None]:
#Add reference genome sequence per gene region
#This will correspond to 1.3kb upstream and downstream of ATG/stop codon 
TFsdf = TFsdf.assign(Reference_Seq = "", Transgenic_Seq = "")

#FASTAs for reference and transgenic are in format of a dictionary of chromosome:"sequence"

for index, rowcontents in TFsdf.iterrows():
  if rowcontents["Strand"] == "+":

    #Define 2.6kb gene region
    regionStart = rowcontents["Start"] - 1601
    regionStop = rowcontents["Stop"] + 1600

    #Add reference sequence
    TFsdf.at[index,"Reference_Seq"] = str(refSeqPerChromosome[rowcontents["Chromosome"]][regionStart:regionStop])
    
    #Add appropriate transgenic sequence, depending on the chromosome
    if rowcontents["Chromosome"].startswith("3"):
      TFsdf.at[index,"Transgenic_Seq"] = str(on2SeqPerChromosome[rowcontents["Chromosome"]][regionStart:regionStop])
    else:
      TFsdf.at[index,"Transgenic_Seq"] = str(on3SeqPerChromosome[rowcontents["Chromosome"]][regionStart:regionStop])

  if rowcontents["Strand"] == "-":

    #Define 2.6kb gene region
    regionStart = rowcontents["Start"] - 1601
    regionStop = rowcontents["Stop"] + 1600

    #Add reference sequence
    refPosStrandSeq = str(refSeqPerChromosome[rowcontents["Chromosome"]][regionStart:regionStop]) #This is the + strand seq, so goes from end to beginning
    TFsdf.at[index,"Reference_Seq"] = revComp(refPosStrandSeq)

    #Add appropriate transgenic sequence, depending on the chromosome
    if rowcontents["Chromosome"].startswith("3"):
      transgStrandSeq = str(on2SeqPerChromosome[rowcontents["Chromosome"]][regionStart:regionStop])
      TFsdf.at[index,"Transgenic_Seq"] = revComp(transgStrandSeq)
    else:
      transgStrandSeq = str(on3SeqPerChromosome[rowcontents["Chromosome"]][regionStart:regionStop])
      TFsdf.at[index,"Transgenic_Seq"] = revComp(transgStrandSeq)

display(TFsdf)

TFsdf.to_excel("TFsdf.xlsx")  

Unnamed: 0,Gene_ID,Transcript_ID,Chromosome,Gene_Region,Start,Stop,Strand,Reference_Seq,Transgenic_Seq
141,FBgn0031086,FBtr0070032,X,start_codon,20092383,20092385,-,CTGTGTCTTCAAAATTCTGGCGGCTGCGAGACGCGACAAAGGAAGT...,GCGAAGGACCCAAAGGAATTTTCGAGGGCCATTTACATCTAAGAGT...
143,FBgn0031086,FBtr0070032,X,stop_codon,20091603,20091605,-,ATAACGATACCCAGAGCATGTGTATAGTACTCGTAGTATATATCCA...,CAATCCAGCCTAACTGAGTCCGTTGCATTAGCGGCAATCCCGACAG...
453,FBgn0000022,FBtr0070072,X,start_codon,370094,370096,+,ATCGGCTAGAATACCTGTACGATCAACATTGGTACTGAAATGTACG...,ACGATCAACATTGGTACTGAAATGTACGGAAGCTGTCCTGTAATCG...
455,FBgn0000022,FBtr0070072,X,stop_codon,370697,370699,+,ACAAATCTATTTCGGCACGGTTTCCTAGCGTGCAACCAGAATTCTG...,TTTCCTAGCGTGCAACCAGAATTCTGACAATGGTTTTAAGAAATAA...
461,FBgn0004170,FBtr0070073,X,start_codon,396177,396179,+,TATTCTAGGCATGTCACCGGCATTTCTCCCAACCATTTATTAAAAT...,ATTTCTCCCAACCATTTATTAAAATGCTTCAATGGAATTTTGGAAA...
...,...,...,...,...,...,...,...,...,...
546480,FBgn0031375,FBtr0330200,2L,stop_codon,1951056,1951058,-,CGCTCCGGTTTCCTTCGCCACCAACGATGACGACGAGGATGAGGAT...,TTACACCTCCAGTCGAGTGTAAGCGATTTAATGAATCTCACATAAA...
546486,FBgn0031375,FBtr0330201,2L,start_codon,1954323,1954325,-,ACAAGAGCCGCACAACAAGCCAGATTAACGGAAAATGCAGTCGAAA...,TTTGTTTTATATTTTATTTTTGCACATTTTGCCGGATTTTCAAGTT...
546489,FBgn0031375,FBtr0330201,2L,stop_codon,1950927,1950929,-,TGACTCCGCTTTCAAGAAGTATGTGCCGGGTCCGTGTTCGGGCGCC...,GGTCTTTGTTTGGGTTAAGTGGAGTGTAAGCCAGGGCCAAGCACGG...
546495,FBgn0031375,FBtr0330202,2L,start_codon,1954323,1954325,-,ACAAGAGCCGCACAACAAGCCAGATTAACGGAAAATGCAGTCGAAA...,TTTGTTTTATATTTTATTTTTGCACATTTTGCCGGATTTTCAAGTT...


In [None]:
def DesignPrimer(template,stringency,GC_content,max_end_GC,size,GC_clamp,TH_max_hairpin,max_polyx,
                 primer_region, primer_type,primer_number):
 
  '''
  THIS FUNCTION CALCULATES A PRIMER GIVEN THE CONDITIONS AND HOW STRICT
   THOSE CONDITIONS SHOULD BE

  stringency is a list with n elements (i.e., n different stringency levels). Each condition is a 
  list (or a list of list). The list contains n elements (one for each striengency level). The 
  innermost list in a list of list contains [min,opt,max]
  
  '''
  if primer_type == "F":
    left = 1
    right = 0
  elif primer_type == "R":
    left = 0
    right = 1

  primer = p3.bindings.designPrimers(
    {
        'SEQUENCE_TEMPLATE': template,
        'SEQUENCE_INCLUDED_REGION': primer_region, 
    },
    {
        'PRIMER_NUM_RETURN':primer_number,

        'PRIMER_TASK': "generic",
        'PRIMER_PICK_LEFT_PRIMER': left,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_PICK_RIGHT_PRIMER': right,

        'PRIMER_MIN_GC': GC_content[stringency][0],
        'PRIMER_OPT_GC_PERCENT': GC_content[stringency][1],
        'PRIMER_MAX_GC': GC_content[stringency][2],
     
        'PRIMER_MIN_SIZE': size[stringency][0],
        'PRIMER_OPT_SIZE': size[stringency][1],
        'PRIMER_MAX_SIZE': size[stringency][2],
     
        'PRIMER_MAX_END_GC': max_end_GC[stringency],
     
        'PRIMER_GC_CLAMP': GC_clamp[stringency],    
             
        'PRIMER_MAX_HAIRPIN_TH':TH_max_hairpin[stringency],
    
        'PRIMER_MAX_POLY_X': max_polyx[stringency],
    })
  
  return primer


def DoIHaveAPrimer(extension,Gene_ID,Transcript_ID,start_stop,primer_type, primer_name,stringency,primer):
  
  if extension == 0:
    Extended_tag = ""
  elif extension == 1:
    Extended_tag = "e"


  if primer_type == "F":
    if primer['PRIMER_LEFT_NUM_RETURNED']>0:
      primers_cleanup = {'Gene_ID': Gene_ID,
                         'Transcript_ID': Transcript_ID,
                         'position':start_stop,
                         'primer_type': primer_name,
                         'primer_sequence': primer['PRIMER_LEFT_0_SEQUENCE'],
                         'stringency_level': f'{Extended_tag}{stringency+1}'}
      warning_variable = False
    else:
      primers_cleanup = ()
      warning_variable = True

  elif primer_type == "R":
    if primer['PRIMER_RIGHT_NUM_RETURNED']>0:
      primers_cleanup = {'Gene_ID': Gene_ID,
                         'Transcript_ID': Transcript_ID,
                         'position':start_stop,
                         'primer_type': primer_name,
                         'primer_sequence': primer['PRIMER_RIGHT_0_SEQUENCE'],
                         'stringency_level': f'{Extended_tag}{stringency+1}'}
      warning_variable = False
    else:
      primers_cleanup = ()
      warning_variable = True

  return primers_cleanup, warning_variable

In [None]:
def SixPrimersCalculator(Gene_ID, Transcript_ID,start_stop,template,primer_name,initial_primer_region,enlarged_primer_region,GC_content,size,max_end_GC,GC_clamp,TH_max_hairpin,max_polyx,stringency_levels):
  primers_table = pd.DataFrame() # DataFrame used to save the 6 primers
  primers_cleanup = ()

  for pt in range(0,len(primer_name)): #loop to get as many primers as specified in the primer_name list (here 6)
    
    # defining what type of primer I am working with 
    if "-F" in primer_name[pt]:
      primer_type = "F"
    elif "-R" in primer_name[pt]:
      primer_type = "R"

    # defining how many primer outputs I want to have (one is NOT enough for the special cases)
    if ((pt==2) or (pt==3)):  # output is only one primer if we do not care about the exact initial position (i.e., for 4/6 primers)
      primer_number = 1000
    else:
      primer_number = 1

    #print('----')
 
   ###############################################################################
    # loop to ACTUALLY take care of the primers (one at a time, considering different stringencies)
    stringency = 0 #initial condition (most stringent conditions)

    safety_net_primer_forward = ()  # empty variables to save HAL-R and HAR-F primers at a random position (BUT still in the right range)
    safety_net_primer_reverse = ()  # this is useful in case we can't find any satisfactory primers in the right position
    primer_region = initial_primer_region

    while stringency<stringency_levels: 
      #print(primers_cleanup)
       
      ## taking care of the special cases of validation primers - their position depends on other primers (if those primers do not exist then validation primers are not generated)

      # THIS PART TAKES CARE OF DEFINING THE UNKNOWN RANGES FOR VAL-F AND VAL-R
      if primer_name[pt] == "val-F" and stringency == 0 and primer_region == initial_primer_region:
        x = "PLACEHOLDER"
        if primers_cleanup != ():
          #print(primers_cleanup)
          x = primers_cleanup["primer_sequence"]
          if str(x) in template: #was addressed differently before
            primer_region[1][0]=(template.index(primers_cleanup['primer_sequence'])) - 99 - primer_region[1][1]#CHECK WITH EXPANSION  ----- in reality this might be 1bp off, might need further checks
            #print(primer_region[1][0])
            enlarged_primer_region[1][0] = primer_region[1][0]

          elif x not in template:
            primers_cleanup = {'Gene_ID': Gene_ID,
                               'Transcript_ID': Transcript_ID,
                                'position':start_stop,
                                'primer_type': primer_name[pt],
                                'primer_sequence': "primer could not be calculated because reference primer (HAL-F) does not exist",
                                'stringency_level': "NA"}
                
            primers_cleanup_table = pd.DataFrame([primers_cleanup]) # coverting to dataframe
            primers_table = pd.concat([primers_table,primers_cleanup_table]) # extending existing list with dataframe that was just created
            stringency = stringency_levels # leaving the loop

      if primer_name[pt] == "val-R"and stringency == 0 and primer_region == initial_primer_region:
        tempo_reverse = "PLACEHOLDER"
        if primers_cleanup != ():
          tempo_reverse = revComp(primers_cleanup['primer_sequence']) #was addressed differently before
          if  tempo_reverse in template:
            primer_region[5][0]=template.index(tempo_reverse) + len(tempo_reverse) + 100  #  ----- in reality this might be 1bp off, might need further checks !!!!!!!
            enlarged_primer_region[5][0] = primer_region[5][0]


          elif x not in template:
            primers_cleanup = {'Gene_ID': Gene_ID,
                               'Transcript_ID': Transcript_ID,
                                    'position':start_stop,
                                    'primer_type': primer_name[pt],
                                    'primer_sequence': "primer could not be because reference primer (HAR-R) does not exist",
                                    'stringency_level': "NA"}
                
            primers_cleanup_table = pd.DataFrame([primers_cleanup]) # coverting to dataframe
            primers_table = pd.concat([primers_table,primers_cleanup_table]) # extending existing list with dataframe that was just created
            stringency = stringency_levels # leaving the loop


      if stringency<stringency_levels:    #LAST BIT WAS TO MOVE THIS
        primer = DesignPrimer(template,stringency,GC_content,max_end_GC,size,GC_clamp,TH_max_hairpin,max_polyx,    # designing the primer
                          primer_region[pt], primer_type, primer_number)
        #print(primer)




###### now we have to check that the primers are good enough ####


      ####### special cases -  HAL-R and HAR -F #############

      if primer_type == "F": # necessary only for special cases 2 and 3 - the # of calculated primers allows us to loop over them and choose the best
        number_of_calculated_primers = primer['PRIMER_LEFT_NUM_RETURNED']  
      elif primer_type == "R":
        number_of_calculated_primers = primer['PRIMER_RIGHT_NUM_RETURNED']
      
      if (pt == 2 or pt == 3): # TO DO NEXT: when we add the extra level this will need to be skipped --> must add and stirngency<stringency level
        primers_cleanup = () # empty variable to save the final primer of choice

        for sc in range(0,number_of_calculated_primers): # evaluating all the primers obtained

            if (pt==2): # HAL-R

              # saving a random "good" primer as a "safety net" in case we can not find any other in the exact position
              if safety_net_primer_reverse == ():   # saving of a "net" happens only if we find a primer + no primers can get overwritten
                safety_net_primer_reverse = primer['PRIMER_RIGHT_0_SEQUENCE'] # saving the first one - for each primer the safety net is overwritten
              
              temporary_name_string = f'PRIMER_RIGHT_{sc}_SEQUENCE'  
              reverse_complement = revComp(primer[temporary_name_string]) # checking position of the primer
              primer_position = template.index(reverse_complement)
              if (primer_position + len(reverse_complement)) == 1600: 
                primers_cleanup = {'Gene_ID': Gene_ID, # if the position is correct we choose it!! --> we can now leave the while loop!
                                   'Transcript_ID': Transcript_ID,
                                   'position':start_stop,
                                   'primer_type': primer_name[pt],
                                   'primer_sequence': primer[f'PRIMER_RIGHT_{sc}_SEQUENCE'],
                                   'stringency_level': f'{stringency+1}'}
                #print(primers_cleanup)

            if (pt==3): #HAR-F

              # saving a random "good" primer as a "safety net" in case we can not find any other in the exact position
              if safety_net_primer_forward == ():
                safety_net_primer_forward = primer['PRIMER_LEFT_0_SEQUENCE']
            
              temporary_name_string = f'PRIMER_LEFT_{sc}_SEQUENCE'
              primer_position = template.index(primer[temporary_name_string])
              if primer_position == 1603: # checking primer position
                primers_cleanup = {'Gene_ID': Gene_ID,  # if the position is correct we choose it!! --> we can now leave the while loop!
                                   'Transcript_ID': Transcript_ID,
                                   'position':start_stop,
                                   'primer_type': primer_name[pt],
                                   'primer_sequence': primer[f'PRIMER_LEFT_{sc}_SEQUENCE'],
                                   'stringency_level': f'{stringency+1}'}
                #print(primers_cleanup)
       
        # TO DO: careful with stringency levels after extenstion
        if stringency == (stringency_levels-1) and primers_cleanup == (): # i.e. if we are at the last stringency and we still haven't left the loop....
          if (pt==2):
            if safety_net_primer_reverse == ():
              last_resort = template[1575:1600]
              primer_last_resort = revComp(last_resort)
              primers_cleanup = {'Gene_ID': Gene_ID,
                                 'Transcript_ID': Transcript_ID,
                                  'position':start_stop,
                                  'primer_type': primer_name[pt],
                                  'primer_sequence': primer_last_resort,
                                  'stringency_level': "25bp no conditions"}
              #print(primers_cleanup)
            else:
              safety_net_primer_reverse_t = revComp(safety_net_primer_reverse)
              net_position = template.index(safety_net_primer_reverse_t)
              extened_safety_net_rev_comp = template[net_position:1600]
              extended_safety_net_primer_reverse = revComp(extened_safety_net_rev_comp)
              primers_cleanup = {'Gene_ID': Gene_ID,
                                 'Transcript_ID': Transcript_ID,
                                  'position':start_stop,
                                  'primer_type': primer_name[pt],
                                  'primer_sequence': extended_safety_net_primer_reverse,
                                  'stringency_level': f"extended from {stringency+1}"}
              #print(primers_cleanup)
          if (pt==3):
            if safety_net_primer_forward == ():
              primers_cleanup = {'Gene_ID': Gene_ID,
                                 'Transcript_ID': Transcript_ID,
                                  'position':start_stop,
                                  'primer_type': primer_name[pt],
                                  'primer_sequence': template[1603:1628],
                                  'stringency_level': "25bp no conditions"}
              #print(primers_cleanup)
            else:
              net_position = template.index(safety_net_primer_forward)
              extension = template [1603:net_position]
              extended_safety_net_primer_forward = extension+safety_net_primer_forward
              primers_cleanup = {'Gene_ID': Gene_ID,
                                 'Transcript_ID': Transcript_ID,
                                  'position':start_stop,
                                  'primer_type': primer_name[pt],
                                  'primer_sequence': extended_safety_net_primer_forward,
                                  'stringency_level': f"extended from {stringency+1}"}
              #print(primers_cleanup)

        if primers_cleanup == ():
          stringency +=1
        else:
          primers_cleanup_table = pd.DataFrame([primers_cleanup]) # coverting to dataframe
          primers_table = pd.concat([primers_table,primers_cleanup_table]) # extending existing list with dataframe that was just created
          stringency = stringency_levels # TO DO: SEE HOW TO MODIFY THIS NEXT


      ########### normal cases #########
      if (pt == 0 or pt == 1 or pt == 4 or pt == 5):
        if primer_region == initial_primer_region:
          extension = 0
        else:
          extension = 1

        primers_cleanup, warning_variable = DoIHaveAPrimer(extension,Gene_ID,Transcript_ID,start_stop,primer_type, primer_name[pt],stringency,primer)      # function 2 created by me 
        
        if warning_variable == False: # i.e., if I do not get a warning then we are done --> we leave the while loop with 
          primers_cleanup_table = pd.DataFrame([primers_cleanup]) # coverting to dataframe
          primers_table = pd.concat([primers_table,primers_cleanup_table]) # extending existing list with dataframe that was just created
          stringency = stringency_levels # leaving the loop
        else:
          if stringency == (stringency_levels-1):
            if primer_region == enlarged_primer_region:
              primers_cleanup = {'Gene_ID': Gene_ID,
                                 'Transcript_ID': Transcript_ID,
                                  'position':start_stop,
                                  'primer_type': primer_name[pt],
                                  'primer_sequence': "primer could not be calculated",
                                  'stringency_level': "NA"}
              #print(primers_cleanup)
              
              primers_cleanup_table = pd.DataFrame([primers_cleanup]) # coverting to dataframe
              primers_table = pd.concat([primers_table,primers_cleanup_table]) # extending existing list with dataframe that was just created
              stringency = stringency_levels # leaving the loop

            elif primer_region == initial_primer_region:
              stringency = 0  
              primer_region = enlarged_primer_region
          else:
              stringency +=1
      #print(stringency)

  return(primers_table)    


In [None]:


# CONDITIONS VARIABLES - [stringent[min,opt,max], relaxed, desperate]
primer_name = ["HAL-F","val-F","HAL-R","HAR-F","HAR-R","val-R"]

initial_primer_region = [[500,200],["unknown",100],[1570,29],[1603,30],[2500,200],["unknown",100]]
extended_primer_region = [[400,400],["unknown",200],[1570,29],[1603,30],[2100,400],["unknown",200]]
#extended_primer_region = [[0,10],[200,10],[1270,29],[1303,30],[2200,10],[2500,10]] ### TO BE MODIFIED!!!!
GC_content = [[30,50,70],[25,50,75],[20,50,80]]
size = [[18,20,25],[18,20,26],[17,20,30]]
max_end_GC = [3,4,4]
GC_clamp = [1,1,0]
TH_max_hairpin = [47.00,48.00,72.00]
max_polyx = [5,6,8] 
stringency_levels = 3



primers_table = ()
primers_table = pd.DataFrame()
#display(TFsdf)
TFsdf_i = TFsdf.reset_index()
display(TFsdf_i)


for x in tqdm(range(0,len(TFsdf["Gene_ID"]))):   #len(TFsdf["Gene_ID"])
  single = SixPrimersCalculator(TFsdf_i["Gene_ID"][x],TFsdf_i["Transcript_ID"][x],TFsdf_i["Gene_Region"][x],TFsdf_i["Reference_Seq"][x],primer_name,initial_primer_region,extended_primer_region,GC_content,size,max_end_GC,GC_clamp,TH_max_hairpin,max_polyx,stringency_levels)
  primers_table = pd.concat([primers_table,single])

display(primers_table)
primers_table.to_excel("primers_output.xlsx")  

Unnamed: 0,index,Gene_ID,Transcript_ID,Chromosome,Gene_Region,Start,Stop,Strand,Reference_Seq,Transgenic_Seq
0,141,FBgn0031086,FBtr0070032,X,start_codon,20092383,20092385,-,CTGTGTCTTCAAAATTCTGGCGGCTGCGAGACGCGACAAAGGAAGT...,GCGAAGGACCCAAAGGAATTTTCGAGGGCCATTTACATCTAAGAGT...
1,143,FBgn0031086,FBtr0070032,X,stop_codon,20091603,20091605,-,ATAACGATACCCAGAGCATGTGTATAGTACTCGTAGTATATATCCA...,CAATCCAGCCTAACTGAGTCCGTTGCATTAGCGGCAATCCCGACAG...
2,453,FBgn0000022,FBtr0070072,X,start_codon,370094,370096,+,ATCGGCTAGAATACCTGTACGATCAACATTGGTACTGAAATGTACG...,ACGATCAACATTGGTACTGAAATGTACGGAAGCTGTCCTGTAATCG...
3,455,FBgn0000022,FBtr0070072,X,stop_codon,370697,370699,+,ACAAATCTATTTCGGCACGGTTTCCTAGCGTGCAACCAGAATTCTG...,TTTCCTAGCGTGCAACCAGAATTCTGACAATGGTTTTAAGAAATAA...
4,461,FBgn0004170,FBtr0070073,X,start_codon,396177,396179,+,TATTCTAGGCATGTCACCGGCATTTCTCCCAACCATTTATTAAAAT...,ATTTCTCCCAACCATTTATTAAAATGCTTCAATGGAATTTTGGAAA...
...,...,...,...,...,...,...,...,...,...,...
4365,546480,FBgn0031375,FBtr0330200,2L,stop_codon,1951056,1951058,-,CGCTCCGGTTTCCTTCGCCACCAACGATGACGACGAGGATGAGGAT...,TTACACCTCCAGTCGAGTGTAAGCGATTTAATGAATCTCACATAAA...
4366,546486,FBgn0031375,FBtr0330201,2L,start_codon,1954323,1954325,-,ACAAGAGCCGCACAACAAGCCAGATTAACGGAAAATGCAGTCGAAA...,TTTGTTTTATATTTTATTTTTGCACATTTTGCCGGATTTTCAAGTT...
4367,546489,FBgn0031375,FBtr0330201,2L,stop_codon,1950927,1950929,-,TGACTCCGCTTTCAAGAAGTATGTGCCGGGTCCGTGTTCGGGCGCC...,GGTCTTTGTTTGGGTTAAGTGGAGTGTAAGCCAGGGCCAAGCACGG...
4368,546495,FBgn0031375,FBtr0330202,2L,start_codon,1954323,1954325,-,ACAAGAGCCGCACAACAAGCCAGATTAACGGAAAATGCAGTCGAAA...,TTTGTTTTATATTTTATTTTTGCACATTTTGCCGGATTTTCAAGTT...


100%|██████████| 4370/4370 [20:10<00:00,  3.61it/s]


Unnamed: 0,Gene_ID,Transcript_ID,position,primer_type,primer_sequence,stringency_level
0,FBgn0031086,FBtr0070032,start_codon,HAL-F,TCTTGGTTTTTGAATTTTGGCATG,1
0,FBgn0031086,FBtr0070032,start_codon,val-F,TCGGGTAACTCGACCATAGC,1
0,FBgn0031086,FBtr0070032,start_codon,HAL-R,CGTAATAATATTGCAATATGTGTCACTGTT,extended from 3
0,FBgn0031086,FBtr0070032,start_codon,HAR-F,GATACGACACCAATCTTCCAGTCC,1
0,FBgn0031086,FBtr0070032,start_codon,HAR-R,TCGCACTATTCTTCGAATGTCG,1
...,...,...,...,...,...,...
0,FBgn0031375,FBtr0330202,stop_codon,val-F,TCAACGCCCACTACAACCTC,1
0,FBgn0031375,FBtr0330202,stop_codon,HAL-R,GAAGAGACCGTTCAGTGCGCGGACAG,extended from 3
0,FBgn0031375,FBtr0330202,stop_codon,HAR-F,GCAGAAGGACCCCTGTCC,3
0,FBgn0031375,FBtr0330202,stop_codon,HAR-R,TGCTCCAGACCCATTTCCAC,1
