In [1]:
import pandas as pd
import plotly.graph_objects as go
from collections import deque

In [2]:
with open('../data/G-protein_coupled_receptor.fasta', 'r') as fasta_file:
    amino_acid_seq = ''
    for line in fasta_file:
        if not line.startswith('>'):
            amino_acid_seq += line.strip()
print(amino_acid_seq)

MDIQMANNFTPPSATPQGNDCDLYAHHSTARIVMPLHYSLVFIIGLVGNLLALVVIVQNRKKINSTTLYSTNLVISDILFTTALPTRIAYYAMGFDWRIGDALCRITALVFYINTYAGVNFMTCLSIDRFIAVVHPLRYNKIKRIEHAKGVCIFVWILVFAQTLPLLINPMSKQEAERITCMEYPNFEETKSLPWILLGACFIGYVLPLIIILICYSQICCKLFRTAKQNPLTEKSGVNKKALNTIILIIVVFVLCFTPYHVAIIQHMIKKLRFSNFLECSQRHSFQISLHFTVCLMNFNCCMDPFIYFFACKGYKRKVMRMLKRQVSVSISSAVKSAPEENSREMTETQMMIHSKSSNGK


In [3]:
aap_df = pd.read_csv('../data/amino_acid_properties.csv', index_col='1-letter code')

In [4]:
aap_df

Unnamed: 0_level_0,Name,3-letter code,Molecular Weight,Molecular Formula,Residue Formula,Residue Weight,pka1,pka2,pkaX,pI,hydropathy index (Kyte-Doolittle method),Accessible surface
1-letter code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
A,Alanine,Ala,89.1,C3H7NO2,C3H5NO,71.08,2.34,9.69,,6.0,1.8,44.1
R,Arginine,Arg,174.2,C6H14N4O2,C6H12N4O,156.19,2.17,9.04,12.48,10.76,-4.5,159.2
N,Asparagine,Asn,132.12,C4H8N2O3,C4H6N2O2,114.11,2.02,8.8,,5.41,-3.5,80.8
D,Aspartic acid,Asp,133.11,C4H7NO4,C4H5NO3,115.09,1.88,9.6,3.65,2.77,-3.5,76.3
C,Cysteine,Cys,121.16,C3H7NO2S,C3H5NOS,103.15,1.96,10.28,8.18,5.07,2.5,56.4
E,Glutamic acid,Glu,147.13,C5H9NO4,C5H7NO3,129.12,2.19,9.67,4.25,3.22,-3.5,99.2
Q,Glutamine,Gln,146.15,C5H10N2O3,C5H8N2O2,128.13,2.17,9.13,,5.65,-3.5,100.6
G,Glycine,Gly,75.07,C2H5NO2,C2H3NO,57.05,2.34,9.6,,5.97,-0.4,0.0
H,Histidine,His,155.16,C6H9N3O2,C6H7N3O,137.14,1.82,9.17,6.0,7.59,-3.2,98.2
I,Isoleucine,Ile,131.18,C6H13NO2,C6H11NO,113.16,2.36,9.6,,6.02,4.5,90.9


In [5]:
aap_series = aap_df['hydropathy index (Kyte-Doolittle method)']
aap = dict(aap_series)
print(aap['M'])

1.9


In [6]:
hydropathy = []
for amino_acid in amino_acid_seq:
    hydropathy.append(aap[amino_acid])

# or hydropathy = [aap[amino_acid] for amino_acid in amino_acid_seq]

In [13]:
data = [
    go.Scatter(x=list(range(0,len(hydropathy))),
               y=hydropathy
    )
]

layout = {
    'title': {
        'text':'Human G-protein coupled receptor 183'
    },
    'yaxis':{
        'title': 'Hydropathy'
    },
    'xaxis':{
        'title': 'Postion'
    }
}

fig = go.Figure(data=data, layout=layout)
fig.show()

In [8]:
def rolling_mean(lst, window_size):
    smoothed_lst = []
    window = deque(lst[:window_size-1], maxlen=window_size)
    for value in lst[window_size-1:]:
        window.append(value)
        window_sum = sum(window)
        smoothed_lst.append(window_sum/window_size)
    return smoothed_lst

In [9]:
hydropathy_5 = rolling_mean(hydropathy, 5)
hydropathy_10 = rolling_mean(hydropathy, 10)
hydropathy_15 = rolling_mean(hydropathy, 15)

In [12]:
data = [
    go.Scatter(x=list(range(0,len(hydropathy_5))),
               y=hydropathy_5,
               name='window size 5'
    ),
   go.Scatter(x=list(range(0,len(hydropathy_10))),
               y=hydropathy_10,
               name='window size 10'
    ),
    go.Scatter(x=list(range(0,len(hydropathy_15))),
               y=hydropathy_15,
               name='window size 15'
    )
]

layout = {
    'title': {
        'text':'Human G-protein coupled receptor 183'
    },
    'yaxis':{
        'title': 'Hydropathy'
    },
    'xaxis':{
        'title': 'Postion'
    }
}

fig = go.Figure(data=data, layout=layout)
fig.show()