# Chapter 24: Application programming interfaces, modules, packages, syntactic sugar

## Getting help

In [10]:
# ls = list()
# help(ls)

## Parsing a FASTA file

In [11]:
from Bio import SeqIO
import io

In [13]:
with io.open("cluster_AB.fasta.txt") as fh:
    id = list()
    seq = list()
    for rec in SeqIO.parse(fh,"fasta"):
        id.append(rec.id)
        seq.append(rec.seq)

In [14]:
id[:5]

['FQVI7FG01E013T',
 'FQVI7FG01BCKNG',
 'FQVI7FG01C2RP4',
 'FQVI7FG01EHHQ0',
 'FQVI7FG01BREXN']

In [15]:
seq[:5]

[Seq('AAGTATTCGTGCGATTCAACGTTTACTTATCAAGTAGGAAGATTCTTGTGGGAT...CAT', SingleLetterAlphabet()),
 Seq('TCTTTTTGCAAGTGATATAGGTATTTCCTTACAGATGAGGAAGCGGAAGATGCC...AAT', SingleLetterAlphabet()),
 Seq('TGAGAAGTGGTTACCGGTGACTAAGGAATCGTATAATTATCTAGACATCAACAT...AAT', SingleLetterAlphabet()),
 Seq('AAAACAATAAGAAAACTTTAAGGGAAATGTGGCTCAACTTTGTTGAAACTGGCA...TAC', SingleLetterAlphabet()),
 Seq('CTAAGTATGGATTATCAGGAGCGTCCCACTTAGACGATCTTATGTACTTGTTCC...TTG', SingleLetterAlphabet())]

## My own method to parse a FASTA

In [16]:
import re
import io

no = sum(1 for i in io.open("cluster_AB.fasta.txt"))
with io.open("cluster_AB.fasta.txt") as fh:
    id = list()
    seq = list()
    for i in range(no):
        ln = fh.readline().strip()
        if re.search(r"^>",ln):
            id.append(ln[1:])
        else:
            seq.append(ln)

In [17]:
id[:5]

['FQVI7FG01E013T',
 'FQVI7FG01BCKNG',
 'FQVI7FG01C2RP4',
 'FQVI7FG01EHHQ0',
 'FQVI7FG01BREXN']

In [18]:
seq[:5]

['AAGTATTCGTGCGATTCAACGTTTACTTATCAAGTAGGAAGATTCTTGTGGGATTTGGCCGAGAATAGCA',
 'AAAATAAAGTATTTTCGTACTATTTTTCGTGCGTGTCGGAGCGAAGTATCTTCGGCCTTCAAGGAGCTAA',
 'GTATGGATTATCAGGAGCGTCCCACTTAGACGATCTTATGTACTTGTTCCATGGCAAGAAATTTGGAATA',
 'CCCGTCGACAAGAACCTACCCTCGTACAAAATGATACAACAGACTTGTGCCCTCTTTACAAATTTTGCTA',
 'AATATGGAAATCCCACACCTGAACCTGTGAACGGAGTAACCTGGCCTCCGTACAACAACATAACCCAAGA']

## Q1: Using the personalized module "bugpop"

In [1]:
import bugpop # note that the bugpop.py needs to be in the same directory

In [2]:
bugpop.bug().genome[:10]

['G', 'C', 'T', 'A', 'G', 'T', 'A', 'C', 'G', 'T']

In [3]:
popa = bugpop.population()
print(len(popa.bug_list))
print(popa.mean_fitness())

50
53.38


In [4]:
popa.create_offspring()
print(len(popa.bug_list))
print(popa.mean_fitness())

100
53.28


In [5]:
popa.cull()
print(len(popa.bug_list))
print(popa.mean_fitness())

50
56.84


#### getting help

In [7]:
help(bugpop)

Help on module bugpop:

NAME
    bugpop - Personal Module for the generation of bug populations

CLASSES
    builtins.object
        bug
        population
    
    class bug(builtins.object)
     |  class to generate individual bugs
     |  
     |  Methods defined here:
     |  
     |  __init__(self)
     |      initializing genome for a bug
     |  
     |  get_fitness(self)
     |      get a arbituary fitness score
     |  
     |  mutate_base(self)
     |      mutate a genome randomly
     |  
     |  set_base(self, index, base)
     |      artificially mutate a genome
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  __weakref__
     |      list of weak references to the object (if defined)
    
    class population(builtins.object)
     |  Class to manage a population of bugs
     |  
     |  Methods d

## Q2: Getting help for the strings

In [9]:
# stg = ""
# help(stg)

## Q3: PyVCF

#### installation of the module please see Question 3.ipynb

## Q4: count and print how many lines in trio.sample.vcf have a reference allele of "A"

> my module (vcfparse.py) was used instead

In [3]:
import vcfparse

In [6]:
dc_chr = vcfparse.get_vcf("trio.sample.vcf.txt").chr

In [7]:
dc_chr

{'1': <vcfparse.chromosome at 0x7f88848416d0>,
 '2': <vcfparse.chromosome at 0x7f887e740e50>,
 '3': <vcfparse.chromosome at 0x7f887e2f3850>,
 '4': <vcfparse.chromosome at 0x7f887df7a550>,
 '5': <vcfparse.chromosome at 0x7f887db9bca0>,
 '6': <vcfparse.chromosome at 0x7f887d8b9ac0>,
 '7': <vcfparse.chromosome at 0x7f887d563340>,
 '8': <vcfparse.chromosome at 0x7f887d29db20>,
 '9': <vcfparse.chromosome at 0x7f887cfd25e0>,
 '10': <vcfparse.chromosome at 0x7f887cdef0a0>,
 '11': <vcfparse.chromosome at 0x7f887cafd220>,
 '12': <vcfparse.chromosome at 0x7f887c7f2e80>,
 '13': <vcfparse.chromosome at 0x7f887c4d3220>,
 '14': <vcfparse.chromosome at 0x7f887c242d00>,
 '15': <vcfparse.chromosome at 0x7f887c0bc7c0>,
 '16': <vcfparse.chromosome at 0x7f887beef4c0>,
 '17': <vcfparse.chromosome at 0x7f887bd37760>,
 '18': <vcfparse.chromosome at 0x7f887bbacca0>,
 '19': <vcfparse.chromosome at 0x7f887b985760>,
 '20': <vcfparse.chromosome at 0x7f887b891b80>,
 '21': <vcfparse.chromosome at 0x7f887b75c400>,
 

In [13]:
dc_refA = dict()
for i in dc_chr.keys():
    dc_loc = dc_chr[i].snploc
    refA = 0
    for loc in dc_loc.keys():
        if dc_loc[loc].ref == "A":
            refA += 1
    dc_refA[i] = refA

In [14]:
dc_refA

{'1': 3271,
 '2': 3644,
 '3': 3192,
 '4': 3303,
 '5': 2745,
 '6': 2794,
 '7': 2409,
 '8': 2338,
 '9': 1905,
 '10': 2097,
 '11': 2162,
 '12': 2018,
 '13': 1791,
 '14': 1394,
 '15': 1176,
 '16': 1204,
 '17': 1027,
 '18': 1303,
 '19': 717,
 '20': 944,
 '21': 636,
 '22': 466,
 'X': 1147}