# This notebook shows the kinds of operations we can perform on FASTA files

In [None]:
import fasta
import utilities

In [None]:
seqs = utilities.load_sequences("files/example.fasta")

### The function `load_sequences` takes a FASTA file and reads it into a dictionary with sequence IDs as the keys and the full Sequence objects as the values

In [None]:
for seq_id, seq_record in seqs.items():
    print ("The sequence ID is ", seq_id)
    print ("And the full record is %s \n" % (seq_record))

## There are some basic functions we can use to get some quick information on the FASTA file

In [None]:
fasta.print_record_overview(seqs)

In [None]:
for seq_id, seq_record in seqs.items():
    print ("The sequence ID is ", seq_id)
    print ("And the full record is %s amino acids long \n" % (len(seq_record.seq)))

## We can also save out our new records at anytime 

In [None]:
# Remember that seqs is a dictionary of SeqRecords, so we have to convert it into just a collection of SeqRecords

records = fasta.map_dict_to_records(seqs)

fasta.write_fasta(records=records, filename="files/new_file.fasta")

## And comparing between two FASTA files is simple, too

In [None]:
seqs_missing = utilities.load_sequences("files/example_missing.fasta") # File that is missing the ARO89866.1 sequence

print ("This sequence is in the first set but not the second -  %s "% (fasta.compare_fasta(seqs, seqs_missing)))