-
Notifications
You must be signed in to change notification settings - Fork 1
/
alignments_q2.py
71 lines (60 loc) · 3.18 KB
/
alignments_q2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
Code developed for application #4 question 2 of the course
Algorithmic Thinking II
"""
# imports needed for this code
import urllib2
import alignments_read_functions as rfs
import sequence_alignment as sa
# URLs for the data files used in the application
PAM50_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_PAM50.txt"
HUMAN_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_HumanEyelessProtein.txt"
FRUITFLY_EYELESS_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_FruitflyEyelessProtein.txt"
CONSENSUS_PAX_URL = "http://storage.googleapis.com/codeskulptor-alg/alg_ConsensusPAXDomain.txt"
WORD_LIST_URL = "http://storage.googleapis.com/codeskulptor-assets/assets_scrabble_words3.txt"
# Resulting local alignments from question 1
HUMAN_LOCAL = "HSGVNQLGGVFVNGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATPEVVSKIAQYKRECPSIFAWEIRDRLLSEGVCTNDNIPSVSSINRVLRNLASEK-QQ"
FRUITFLY_LOCAL = "HSGVNQLGGVFVGGRPLPDSTRQKIVELAHSGARPCDISRILQVSNGCVSKILGRYYETGSIRPRAIGGSKPRVATAEVVSKISQYKRECPSIFAWEIRDRLLQENVCTNDNIPSVSSINRVLRNLAAQKEQQ"
# Set timeout for CodeSkulptor (only if this code is run in Code Skulptor)
#import codeskulptor
#codeskulptor.set_timeout(20)
#####################################
# Code for answering question 2 of the application
# Read the ConsensusPAXDomain and print the result
consensus_pax = rfs.read_protein(CONSENSUS_PAX_URL)
print "Consensus PAX domain protein:"
print consensus_pax
print "Length consensus PAX domain protein =", len(consensus_pax)
print
# Read the PAM50 scoring matrix and print the result
scoring_matrix = rfs.read_scoring_matrix(PAM50_URL)
# Computations for the human protein sequence
# . remove all '-' from the sequence
lst_of_strings = HUMAN_LOCAL.split('-')
human_local = ""
for substring in lst_of_strings:
human_local += substring
# . compute the alignment between this sequence and the ConsensusPAXDomain sequence
alignment_matrix = sa.compute_alignment_matrix(human_local, consensus_pax, scoring_matrix)
global_alignment = sa.compute_global_alignment(human_local, consensus_pax, scoring_matrix, alignment_matrix)
# . determine percentage of agreement
agreements = 0
for index in range(len(global_alignment[1])):
if global_alignment[1][index] == global_alignment[2][index]:
agreements += 1
print ">>>>>>>>>>>>>>> Percentage of agreement human local =", ( float(agreements) / len(global_alignment[1]) ) * 100, "%"
# Computations for the fruitfly protein sequence
# . remove all '-' from the sequence
lst_of_strings = FRUITFLY_LOCAL.split('-')
fruitfly_local = ""
for substring in lst_of_strings:
fruitfly_local += substring
# . compute the alignment between this sequence and the ConsensusPAXDomain sequence
alignment_matrix = sa.compute_alignment_matrix(fruitfly_local, consensus_pax, scoring_matrix)
global_alignment = sa.compute_global_alignment(fruitfly_local, consensus_pax, scoring_matrix, alignment_matrix)
# . determine percentage of agreement
agreements = 0
for index in range(len(global_alignment[1])):
if global_alignment[1][index] == global_alignment[2][index]:
agreements += 1
print ">>>>>>>>>>>>>>> Percentage of agreement fruitfly local =", ( float(agreements) / len(global_alignment[1]) ) * 100, "%"