In [1]:
import pandas as pd

#dfs = data frames - returns a list

dataframes = pd.read_html('https://www.neb.com/tools-and-resources/selection-charts/alphabetized-list-of-recognition-specificities')
dataframes

[                        Recognition Sequence               Enzyme
 0                                    AA/CGTT                 AclI
 1                                    A/AGCTT  HindIII HindIII-HF®
 2                                    AAT/ATT        SspI SspI-HF®
 3                                      /AATT        MluCI Tsp509I
 4                                    A/CATGT                 PciI
 5                                    A/CCGGT        AgeI AgeI-HF®
 6                                ACCTGC(4/8)          BspMI BfuAI
 7                                   A/CCWGGT                SexAI
 8                                    A/CGCGT        MluI MluI-HF®
 9                               ACGGC(12/14)                BceAI
 10                                     A/CGT             HpyCH4IV
 11                                    ACN/GT            HpyCH4III
 12                  (10/15)ACNNNNGTAYC(12/7)                 BaeI
 13                   (9/12)ACNNNNNCTCC(10/7)                B

In [2]:
restriction_enzymes = dataframes[0]
print(restriction_enzymes.shape)
restriction_enzymes.head()
#.head shows first 5 rows

(240, 2)


Unnamed: 0,Recognition Sequence,Enzyme
0,AA/CGTT,AclI
1,A/AGCTT,HindIII HindIII-HF®
2,AAT/ATT,SspI SspI-HF®
3,/AATT,MluCI Tsp509I
4,A/CATGT,PciI


All restriction endonuclease recognition specificities available from New England Biolabs are listed below. For enzymes that recognize non-palindromic sequences, the complementary sequence of each strand is listed. For example, CCTC(7/6) and (6/7)GAGG both represent an MnlI (NEB #R0163) site.

All recognition sequences are written 5´ to 3´ using the single letter code nomenclature with the point of cleavage indicated by a "/".

Numbers in parentheses indicate point of cleaveage for non-palindromic enzymes.

For example, GGTCTC(1/5) indicates cleavage at:

    5´ ...GGTCTCN/...3´
    3´ ...CCAGAGNNNNN/...5´
    
![](https://www.neb.com/~/media/NebUs/Page%20Images/Tools%20and%20Resources/charts/singlelettercode.jpg?device=modal)



In [4]:
~restriction_enzymes['Recognition Sequence'].str.contains('\(')

0       True
1       True
2       True
3       True
4       True
5       True
6      False
7       True
8       True
9      False
10      True
11      True
12     False
13     False
14      True
15      True
16     False
17     False
18      True
19      True
20      True
21      True
22      True
23      True
24     False
25      True
26      True
27      True
28     False
29      True
       ...  
210     True
211     True
212     True
213     True
214    False
215     True
216    False
217     True
218     True
219    False
220     True
221     True
222     True
223     True
224     True
225     True
226     True
227     True
228    False
229     True
230     True
231     True
232     True
233     True
234     True
235     True
236     True
237     True
238     True
239     True
Name: Recognition Sequence, Length: 240, dtype: bool

In [5]:
regular = restriction_enzymes.loc[~restriction_enzymes['Recognition Sequence'].str.contains('\(')]
print(regular.shape)
regular.head()
#regular refers to sequences that don't have paranthesis or numbers but these are not all true palindromes

(175, 2)


Unnamed: 0,Recognition Sequence,Enzyme
0,AA/CGTT,AclI
1,A/AGCTT,HindIII HindIII-HF®
2,AAT/ATT,SspI SspI-HF®
3,/AATT,MluCI Tsp509I
4,A/CATGT,PciI


In [6]:
regular_acgt = regular.loc[~regular['Recognition Sequence'].str.contains('[BDHKMNRSVWY]')]
print(regular_acgt.shape)
regular_acgt.head(10)

(100, 2)


Unnamed: 0,Recognition Sequence,Enzyme
0,AA/CGTT,AclI
1,A/AGCTT,HindIII HindIII-HF®
2,AAT/ATT,SspI SspI-HF®
3,/AATT,MluCI Tsp509I
4,A/CATGT,PciI
5,A/CCGGT,AgeI AgeI-HF®
8,A/CGCGT,MluI MluI-HF®
10,A/CGT,HpyCH4IV
15,A/CTAGT,SpeI SpeI-HF®
18,A/GATCT,BglII


In [7]:
regular_acgt['sequence_no_slash'] = regular_acgt['Recognition Sequence'].str.replace('/', '')
print(regular_acgt.shape)
regular_acgt.head(10)

(100, 3)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Recognition Sequence,Enzyme,sequence_no_slash
0,AA/CGTT,AclI,AACGTT
1,A/AGCTT,HindIII HindIII-HF®,AAGCTT
2,AAT/ATT,SspI SspI-HF®,AATATT
3,/AATT,MluCI Tsp509I,AATT
4,A/CATGT,PciI,ACATGT
5,A/CCGGT,AgeI AgeI-HF®,ACCGGT
8,A/CGCGT,MluI MluI-HF®,ACGCGT
10,A/CGT,HpyCH4IV,ACGT
15,A/CTAGT,SpeI SpeI-HF®,ACTAGT
18,A/GATCT,BglII,AGATCT


In [9]:
from Bio import SeqIO

records = SeqIO.parse('/Users/renuka.kumar/code/cupcakes/Mononegavirales243SequencesFASTA.fasta', 'fasta')
records

for record in records:
    for i, (original, enzyme, sequence) in regular_acgt.iterrows():
        if sequence in record:
            pass
#                print(f'{sequence} found!')
        else:
            print(f'{sequence} not found in {record.id}!\tEnzyme: {enzyme}')

AACGTT not found in gi|9632550|ref|NC_000855.1|!	Enzyme: AclI
ACTAGT not found in gi|9632550|ref|NC_000855.1|!	Enzyme: SpeI SpeI-HF®
ATGCAT not found in gi|9632550|ref|NC_000855.1|!	Enzyme: NsiI NsiI-HF®
ATTAAT not found in gi|9632550|ref|NC_000855.1|!	Enzyme: AseI
ATTTAAAT not found in gi|9632550|ref|NC_000855.1|!	Enzyme: SwaI
CACGTG not found in gi|9632550|ref|NC_000855.1|!	Enzyme: PmlI
CCGCGG not found in gi|9632550|ref|NC_000855.1|!	Enzyme: SacII
CCTCAGC not found in gi|9632550|ref|NC_000855.1|!	Enzyme: Nb.BbvCI
CCTGCAGG not found in gi|9632550|ref|NC_000855.1|!	Enzyme: SbfI SbfI-HF®
CGATCG not found in gi|9632550|ref|NC_000855.1|!	Enzyme: PvuI PvuI-HF®
CGGCCG not found in gi|9632550|ref|NC_000855.1|!	Enzyme: EagI EagI-HF®
CGTACG not found in gi|9632550|ref|NC_000855.1|!	Enzyme: BsiWI BsiWI-HF®
CTTAAG not found in gi|9632550|ref|NC_000855.1|!	Enzyme: AflII
GCCCGGGC not found in gi|9632550|ref|NC_000855.1|!	Enzyme: SrfI
GCCGGC not found in gi|9632550|ref|NC_000855.1|!	Enzyme: NaeI
G

CCTGCAGG not found in gi|9629198|ref|NC_001781.1|!	Enzyme: SbfI SbfI-HF®
CGATCG not found in gi|9629198|ref|NC_001781.1|!	Enzyme: PvuI PvuI-HF®
CGGCCG not found in gi|9629198|ref|NC_001781.1|!	Enzyme: EagI EagI-HF®
CTCGAG not found in gi|9629198|ref|NC_001781.1|!	Enzyme: XhoI PaeR7I TliI
GACGTC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: AatII
GACGTC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: ZraI
GCCCGGGC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: SrfI
GCCGGC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: NaeI
GCCGGC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: NgoMIV
GCGATCGC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: AsiSI
GCGCGC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: BssHII
GCGGCCGC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: NotI NotI-HF®
GCTAGC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: NheI NheI-HF®
GCTAGC not found in gi|9629198|ref|NC_001781.1|!	Enzyme: BmtI BmtI-HF®
GGCCGGCC not found in gi|9629198|ref|NC_001781.1|!	E

ATTTAAAT not found in gi|13559808|ref|NC_002728.1|!	Enzyme: SwaI
CCGCGG not found in gi|13559808|ref|NC_002728.1|!	Enzyme: SacII
CCTGCAGG not found in gi|13559808|ref|NC_002728.1|!	Enzyme: SbfI SbfI-HF®
GCATGC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: SphI SphI-HF®
GCCCGGGC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: SrfI
GCGATCGC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: AsiSI
GCGCGC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: BssHII
GCGGCCGC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: NotI NotI-HF®
GGCCGGCC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: FseI
GGCGCC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: NarI
GGCGCC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: KasI
GGCGCC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: SfoI
GGCGCC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: PluTI
GGCGCGCC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: AscI
GTTTAAAC not found in gi|13559808|ref|NC_002728.1|!	Enzyme: PmeI
TACGTA 

TACGTA not found in gi|55770807|ref|NC_006432.1|!	Enzyme: SnaBI
TCGCGA not found in gi|55770807|ref|NC_006432.1|!	Enzyme: NruI NruI-HF®
ACCGGT not found in gi|56900714|ref|NC_006579.1|!	Enzyme: AgeI AgeI-HF®
ACGCGT not found in gi|56900714|ref|NC_006579.1|!	Enzyme: MluI MluI-HF®
AGCGCT not found in gi|56900714|ref|NC_006579.1|!	Enzyme: AfeI
ATCGAT not found in gi|56900714|ref|NC_006579.1|!	Enzyme: ClaI BspDI
ATTTAAAT not found in gi|56900714|ref|NC_006579.1|!	Enzyme: SwaI
CCCGGG not found in gi|56900714|ref|NC_006579.1|!	Enzyme: SmaI
CCCGGG not found in gi|56900714|ref|NC_006579.1|!	Enzyme: XmaI TspMI
CCGCGG not found in gi|56900714|ref|NC_006579.1|!	Enzyme: SacII
CCTGCAGG not found in gi|56900714|ref|NC_006579.1|!	Enzyme: SbfI SbfI-HF®
CGATCG not found in gi|56900714|ref|NC_006579.1|!	Enzyme: PvuI PvuI-HF®
CGGCCG not found in gi|56900714|ref|NC_006579.1|!	Enzyme: EagI EagI-HF®
CGTACG not found in gi|56900714|ref|NC_006579.1|!	Enzyme: BsiWI BsiWI-HF®
GACGTC not found in gi|56900714|ref

TCCGGA not found in gi|655454911|ref|NC_024296.1|!	Enzyme: BspEI
TCGCGA not found in gi|655454911|ref|NC_024296.1|!	Enzyme: NruI NruI-HF®
TGCGCA not found in gi|655454911|ref|NC_024296.1|!	Enzyme: FspI
TTAATTAA not found in gi|655454911|ref|NC_024296.1|!	Enzyme: PacI
TTCGAA not found in gi|655454911|ref|NC_024296.1|!	Enzyme: BstBI
AATATT not found in gi|658645608|ref|NC_024376.1|!	Enzyme: SspI SspI-HF®
ACATGT not found in gi|658645608|ref|NC_024376.1|!	Enzyme: PciI
ACGCGT not found in gi|658645608|ref|NC_024376.1|!	Enzyme: MluI MluI-HF®
ACTAGT not found in gi|658645608|ref|NC_024376.1|!	Enzyme: SpeI SpeI-HF®
ATTTAAAT not found in gi|658645608|ref|NC_024376.1|!	Enzyme: SwaI
CACGAG not found in gi|658645608|ref|NC_024376.1|!	Enzyme: Nb.BssSI
CGATCG not found in gi|658645608|ref|NC_024376.1|!	Enzyme: PvuI PvuI-HF®
GCCCGGGC not found in gi|658645608|ref|NC_024376.1|!	Enzyme: SrfI
GCGATCGC not found in gi|658645608|ref|NC_024376.1|!	Enzyme: AsiSI
GCGGCCGC not found in gi|658645608|ref|NC_02

GGCGCC not found in gi|700074610|ref|NC_025345.1|!	Enzyme: NarI
GGCGCC not found in gi|700074610|ref|NC_025345.1|!	Enzyme: KasI
GGCGCC not found in gi|700074610|ref|NC_025345.1|!	Enzyme: SfoI
GGCGCC not found in gi|700074610|ref|NC_025345.1|!	Enzyme: PluTI
GGCGCGCC not found in gi|700074610|ref|NC_025345.1|!	Enzyme: AscI
GGGCCC not found in gi|700074610|ref|NC_025345.1|!	Enzyme: ApaI
GGGCCC not found in gi|700074610|ref|NC_025345.1|!	Enzyme: PspOMI
GTTTAAAC not found in gi|700074610|ref|NC_025345.1|!	Enzyme: PmeI
TGCGCA not found in gi|700074610|ref|NC_025345.1|!	Enzyme: FspI
ACGCGT not found in gi|700074642|ref|NC_025347.1|!	Enzyme: MluI MluI-HF®
ACTAGT not found in gi|700074642|ref|NC_025347.1|!	Enzyme: SpeI SpeI-HF®
CCCGGG not found in gi|700074642|ref|NC_025347.1|!	Enzyme: SmaI
CCCGGG not found in gi|700074642|ref|NC_025347.1|!	Enzyme: XmaI TspMI
CCGCGG not found in gi|700074642|ref|NC_025347.1|!	Enzyme: SacII
CCTCAGC not found in gi|700074642|ref|NC_025347.1|!	Enzyme: Nb.BbvCI
CGT

TGCGCA not found in gi|700074782|ref|NC_025358.1|!	Enzyme: FspI
ACGCGT not found in gi|700085525|ref|NC_025359.1|!	Enzyme: MluI MluI-HF®
AGCGCT not found in gi|700085525|ref|NC_025359.1|!	Enzyme: AfeI
ATTTAAAT not found in gi|700085525|ref|NC_025359.1|!	Enzyme: SwaI
CCGCGG not found in gi|700085525|ref|NC_025359.1|!	Enzyme: SacII
CCTCAGC not found in gi|700085525|ref|NC_025359.1|!	Enzyme: Nb.BbvCI
CCTGCAGG not found in gi|700085525|ref|NC_025359.1|!	Enzyme: SbfI SbfI-HF®
CGATCG not found in gi|700085525|ref|NC_025359.1|!	Enzyme: PvuI PvuI-HF®
CGGCCG not found in gi|700085525|ref|NC_025359.1|!	Enzyme: EagI EagI-HF®
CGTACG not found in gi|700085525|ref|NC_025359.1|!	Enzyme: BsiWI BsiWI-HF®
CTGCAG not found in gi|700085525|ref|NC_025359.1|!	Enzyme: PstI PstI-HF®
GCATGC not found in gi|700085525|ref|NC_025359.1|!	Enzyme: SphI SphI-HF®
GCCCGGGC not found in gi|700085525|ref|NC_025359.1|!	Enzyme: SrfI
GCCGGC not found in gi|700085525|ref|NC_025359.1|!	Enzyme: NaeI
GCCGGC not found in gi|7000

GCGATCGC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: AsiSI
GCGCGC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: BssHII
GCGGCCGC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: NotI NotI-HF®
GCTAGC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: NheI NheI-HF®
GCTAGC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: BmtI BmtI-HF®
GGATCC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: BamHI BamHI-HF®
GGCCGGCC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: FseI
GGCGCC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: NarI
GGCGCC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: KasI
GGCGCC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: SfoI
GGCGCC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: PluTI
GGCGCGCC not found in gi|700895628|ref|NC_025383.1|!	Enzyme: AscI
TTAATTAA not found in gi|700895628|ref|NC_025383.1|!	Enzyme: PacI
ACGCGT not found in gi|700895635|ref|NC_025384.1|!	Enzyme: MluI MluI-HF®
ACTAGT not found in gi|700895635|ref|NC_025

GCCCGGGC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: SrfI
GCCGGC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: NaeI
GCCGGC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: NgoMIV
GCGATCGC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: AsiSI
GCGCGC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: BssHII
GCGGCCGC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: NotI NotI-HF®
GGCCGGCC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: FseI
GGCGCC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: NarI
GGCGCC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: KasI
GGCGCC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: SfoI
GGCGCC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: PluTI
GGCGCGCC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: AscI
GGGCCC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: ApaI
GGGCCC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: PspOMI
GTTTAAAC not found in gi|701219332|ref|NC_025396.1|!	Enzyme: PmeI
TACGTA not 

TACGTA not found in gi|701219433|ref|NC_025408.1|!	Enzyme: SnaBI
TGGCCA not found in gi|701219433|ref|NC_025408.1|!	Enzyme: MscI
TTAATTAA not found in gi|701219433|ref|NC_025408.1|!	Enzyme: PacI
ACGCGT not found in gi|701219439|ref|NC_025410.1|!	Enzyme: MluI MluI-HF®
ATCGAT not found in gi|701219439|ref|NC_025410.1|!	Enzyme: ClaI BspDI
ATTTAAAT not found in gi|701219439|ref|NC_025410.1|!	Enzyme: SwaI
CCCGGG not found in gi|701219439|ref|NC_025410.1|!	Enzyme: SmaI
CCCGGG not found in gi|701219439|ref|NC_025410.1|!	Enzyme: XmaI TspMI
CCTCAGC not found in gi|701219439|ref|NC_025410.1|!	Enzyme: Nb.BbvCI
CCTGCAGG not found in gi|701219439|ref|NC_025410.1|!	Enzyme: SbfI SbfI-HF®
CGATCG not found in gi|701219439|ref|NC_025410.1|!	Enzyme: PvuI PvuI-HF®
CGGCCG not found in gi|701219439|ref|NC_025410.1|!	Enzyme: EagI EagI-HF®
CGTACG not found in gi|701219439|ref|NC_025410.1|!	Enzyme: BsiWI BsiWI-HF®
GCCCGGGC not found in gi|701219439|ref|NC_025410.1|!	Enzyme: SrfI
GCGATCGC not found in gi|701219

AATATT not found in gi|946699743|ref|NC_028243.1|!	Enzyme: SspI SspI-HF®
ACGCGT not found in gi|946699743|ref|NC_028243.1|!	Enzyme: MluI MluI-HF®
ATTAAT not found in gi|946699743|ref|NC_028243.1|!	Enzyme: AseI
ATTTAAAT not found in gi|946699743|ref|NC_028243.1|!	Enzyme: SwaI
CCCGGG not found in gi|946699743|ref|NC_028243.1|!	Enzyme: SmaI
CCCGGG not found in gi|946699743|ref|NC_028243.1|!	Enzyme: XmaI TspMI
CCGCGG not found in gi|946699743|ref|NC_028243.1|!	Enzyme: SacII
CGGCCG not found in gi|946699743|ref|NC_028243.1|!	Enzyme: EagI EagI-HF®
GCAATG not found in gi|946699743|ref|NC_028243.1|!	Enzyme: Nb.BsrDI
GCCCGGGC not found in gi|946699743|ref|NC_028243.1|!	Enzyme: SrfI
GCGATCGC not found in gi|946699743|ref|NC_028243.1|!	Enzyme: AsiSI
GCGCGC not found in gi|946699743|ref|NC_028243.1|!	Enzyme: BssHII
GCGGCCGC not found in gi|946699743|ref|NC_028243.1|!	Enzyme: NotI NotI-HF®
GGCCGGCC not found in gi|946699743|ref|NC_028243.1|!	Enzyme: FseI
GGCGCC not found in gi|946699743|ref|NC_0282

GGCCGGCC not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: FseI
GGCGCGCC not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: AscI
GTATAC not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: BstZ17I
GTATAC not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: BstZ17I-HF®
GTTAAC not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: HpaI
GTTTAAAC not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: PmeI
TCCGGA not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: BspEI
TCTAGA not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: XbaI
TGCGCA not found in gi|1049010299|ref|NC_030691.1|!	Enzyme: FspI
AACGTT not found in gi|1049010306|ref|NC_030692.1|!	Enzyme: AclI
ACGCGT not found in gi|1049010306|ref|NC_030692.1|!	Enzyme: MluI MluI-HF®
ACTAGT not found in gi|1049010306|ref|NC_030692.1|!	Enzyme: SpeI SpeI-HF®
ATTTAAAT not found in gi|1049010306|ref|NC_030692.1|!	Enzyme: SwaI
CCTCAGC not found in gi|1049010306|ref|NC_030692.1|!	Enzyme: Nb.BbvCI
CCTGCAGG not found in gi|1049010306|ref|NC_03069

GGCCGGCC not found in gi|1070619066|ref|NC_031227.1|!	Enzyme: FseI
GGCGCGCC not found in gi|1070619066|ref|NC_031227.1|!	Enzyme: AscI
GGTACC not found in gi|1070619066|ref|NC_031227.1|!	Enzyme: KpnI KpnI-HF®
GGTACC not found in gi|1070619066|ref|NC_031227.1|!	Enzyme: Acc65I
TTAATTAA not found in gi|1070619066|ref|NC_031227.1|!	Enzyme: PacI
ACGCGT not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: MluI MluI-HF®
AGCGCT not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: AfeI
AGGCCT not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: StuI
CCCGGG not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: SmaI
CCCGGG not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: XmaI TspMI
CCGCGG not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: SacII
CCTAGG not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: AvrII
CCTCAGC not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: Nb.BbvCI
CCTGCAGG not found in gi|1070619073|ref|NC_031232.1|!	Enzyme: SbfI SbfI-HF®
CGTACG not found in gi|1070619073|ref|NC

GTGCAC not found in gi|1070640318|ref|NC_031276.1|!	Enzyme: ApaLI
GTTTAAAC not found in gi|1070640318|ref|NC_031276.1|!	Enzyme: PmeI
TACGTA not found in gi|1070640318|ref|NC_031276.1|!	Enzyme: SnaBI
TCGCGA not found in gi|1070640318|ref|NC_031276.1|!	Enzyme: NruI NruI-HF®
TGCGCA not found in gi|1070640318|ref|NC_031276.1|!	Enzyme: FspI
ACGCGT not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: MluI MluI-HF®
ATTTAAAT not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: SwaI
CACGAG not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: Nb.BssSI
CCATGG not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: NcoI NcoI-HF®
CCCGGG not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: SmaI
CCCGGG not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: XmaI TspMI
CCGCGG not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: SacII
CCTAGG not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: AvrII
CCTGCAGG not found in gi|1070640418|ref|NC_031278.1|!	Enzyme: SbfI SbfI-HF®
CGGCCG not found in gi|1070640418|

GGTACC not found in gi|1192699215|ref|NC_034531.1|!	Enzyme: KpnI KpnI-HF®
GGTACC not found in gi|1192699215|ref|NC_034531.1|!	Enzyme: Acc65I
GTCGAC not found in gi|1192699215|ref|NC_034531.1|!	Enzyme: SalI SalI-HF®
GTTTAAAC not found in gi|1192699215|ref|NC_034531.1|!	Enzyme: PmeI
TGCGCA not found in gi|1192699215|ref|NC_034531.1|!	Enzyme: FspI
TTAATTAA not found in gi|1192699215|ref|NC_034531.1|!	Enzyme: PacI
TTCGAA not found in gi|1192699215|ref|NC_034531.1|!	Enzyme: BstBI
ACCGGT not found in gi|1192699232|ref|NC_034533.1|!	Enzyme: AgeI AgeI-HF®
ACGCGT not found in gi|1192699232|ref|NC_034533.1|!	Enzyme: MluI MluI-HF®
AGGCCT not found in gi|1192699232|ref|NC_034533.1|!	Enzyme: StuI
CACGAG not found in gi|1192699232|ref|NC_034533.1|!	Enzyme: Nb.BssSI
CCCGGG not found in gi|1192699232|ref|NC_034533.1|!	Enzyme: SmaI
CCCGGG not found in gi|1192699232|ref|NC_034533.1|!	Enzyme: XmaI TspMI
CCGCGG not found in gi|1192699232|ref|NC_034533.1|!	Enzyme: SacII
CCTCAGC not found in gi|1192699232|r

In [10]:
from Bio import SeqIO

records = SeqIO.parse('/Users/renuka.kumar/code/cupcakes/Filovirus7SequencesFASTA.fasta', 'fasta')
records

for record in records:
    for i, (original, enzyme, sequence) in regular_acgt.iterrows():
        if sequence in record:
            pass
#                print(f'{sequence} found!')
        else:
            print(f'{sequence} not found in {record.id}!\tEnzyme: {enzyme}')

AGCGCT not found in gi|158539108|ref|NC_001608.3|!	Enzyme: AfeI
ATTTAAAT not found in gi|158539108|ref|NC_001608.3|!	Enzyme: SwaI
CCGCGG not found in gi|158539108|ref|NC_001608.3|!	Enzyme: SacII
CCTAGG not found in gi|158539108|ref|NC_001608.3|!	Enzyme: AvrII
CCTGCAGG not found in gi|158539108|ref|NC_001608.3|!	Enzyme: SbfI SbfI-HF®
CGATCG not found in gi|158539108|ref|NC_001608.3|!	Enzyme: PvuI PvuI-HF®
CGGCCG not found in gi|158539108|ref|NC_001608.3|!	Enzyme: EagI EagI-HF®
CGTACG not found in gi|158539108|ref|NC_001608.3|!	Enzyme: BsiWI BsiWI-HF®
GCATGC not found in gi|158539108|ref|NC_001608.3|!	Enzyme: SphI SphI-HF®
GCCCGGGC not found in gi|158539108|ref|NC_001608.3|!	Enzyme: SrfI
GCCGGC not found in gi|158539108|ref|NC_001608.3|!	Enzyme: NaeI
GCCGGC not found in gi|158539108|ref|NC_001608.3|!	Enzyme: NgoMIV
GCGATCGC not found in gi|158539108|ref|NC_001608.3|!	Enzyme: AsiSI
GCGCGC not found in gi|158539108|ref|NC_001608.3|!	Enzyme: BssHII
GCGGCCGC not found in gi|158539108|ref|NC_