In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [2]:
sOutdir = '/casa/jimkwon/PROJECT/ERH/results/200425_psipred/'

In [12]:
sScoreFile = sOutdir + 'DGCR8_scorecons.txt'
sDisorderFile = sOutdir + 'DGCR8_isunstruct.txt'
sPsipredFile = sOutdir + 'psipred4.ss2'

## Psipred

In [8]:
lPsipred = []
lPsipredValue = []
for line in open(sPsipredFile):
    line = line.strip()
    if line.startswith('#'):
        continue
    field = line.split()
    if len(field) < 3:
        continue
    nResidue = field[0]
    sSeq = field[1]
    sType = field[2]
    if sType == 'C':
        fValue = 0
    elif sType == 'H': # helix
        fValue = 0.5
    elif sType == 'E': # strand
        fValue = 1
    lPsipred.append([nResidue, sSeq, fValue])
    lPsipredValue.append(fValue)
print(lPsipred[-5:])
print(lPsipredValue[-5:])

[['769', 'C', 0], ['770', 'T', 0], ['771', 'V', 0], ['772', 'D', 0], ['773', 'V', 0]]
[0, 0, 0, 0, 0]


## Conservation score

In [9]:
lScore = []
lScoreValue = []
for line in open(sScoreFile):
    line = line.strip()
    if line.startswith('#'):
        continue
    field = line.split()
    if len(field) < 3:
        continue
    fScore = field[0]
    sSeq = field[1]       
    lScore.append([sSeq, fScore])
    lScoreValue.append(fScore)
print(lScore)
print('---')
print(lScoreValue)

[['M', '0.825'], ['E', '0.920'], ['T', '0.404'], ['D', '0.536'], ['E', '0.659'], ['S', '0.435'], ['P', '0.534'], ['S', '0.881'], ['P', '1.000'], ['L', '1.000'], ['P', '1.000'], ['C', '0.280'], ['G', '0.617'], ['P', '0.798'], ['A', '0.564'], ['G', '0.337'], ['E', '0.076'], ['A', '0.047'], ['V', '0.048'], ['M', '0.069'], ['E', '0.095'], ['S', '0.060'], ['R', '0.016'], ['A', '0.091'], ['R', '0.067'], ['P', '0.295'], ['F', '0.403'], ['Q', '0.366'], ['A', '0.347'], ['L', '0.305'], ['P', '0.420'], ['R', '0.504'], ['E', '0.422'], ['Q', '0.593'], ['S', '0.881'], ['P', '1.000'], ['P', '1.000'], ['P', '1.000'], ['P', '1.000'], ['L', '1.000'], ['Q', '1.000'], ['T', '1.000'], ['S', '1.000'], ['S', '1.000'], ['G', '0.881'], ['A', '1.000'], ['E', '1.000'], ['V', '0.634'], ['M', '1.000'], ['D', '1.000'], ['V', '1.000'], ['G', '0.724'], ['S', '1.000'], ['G', '1.000'], ['G', '1.000'], ['D', '1.000'], ['G', '1.000'], ['Q', '0.584'], ['S', '0.510'], ['E', '0.413'], ['L', '0.766'], ['P', '1.000'], ['A', '

## Disordered

In [13]:
lDisorder = []
lDisorderValue = []
for line in open(sDisorderFile):
    line = line.strip()
    if line.startswith('#'):
        continue
    field = line.split()
    if len(field) < 3:
        continue
    nResidue = field[0]
    fDisorder = field[3]
    sSeq = field[1]       
    lDisorder.append([nResidue, sSeq, fDisorder])
    lDisorderValue.append(fDisorder)
print(lDisorder)
print('---')
print(lDisorderValue)

[['1', 'M', '0.998'], ['2', 'E', '0.996'], ['3', 'T', '0.994'], ['4', 'D', '0.993'], ['5', 'E', '0.992'], ['6', 'S', '0.991'], ['7', 'P', '0.988'], ['8', 'S', '0.981'], ['9', 'P', '0.969'], ['10', 'L', '0.947'], ['11', 'P', '0.940'], ['12', 'C', '0.924'], ['13', 'G', '0.929'], ['14', 'P', '0.934'], ['15', 'A', '0.932'], ['16', 'G', '0.931'], ['17', 'E', '0.930'], ['18', 'A', '0.923'], ['19', 'V', '0.919'], ['20', 'M', '0.928'], ['21', 'E', '0.941'], ['22', 'S', '0.947'], ['23', 'R', '0.946'], ['24', 'A', '0.943'], ['25', 'R', '0.942'], ['26', 'P', '0.939'], ['27', 'F', '0.928'], ['28', 'Q', '0.936'], ['29', 'A', '0.938'], ['30', 'L', '0.943'], ['31', 'P', '0.962'], ['32', 'R', '0.973'], ['33', 'E', '0.981'], ['34', 'Q', '0.986'], ['35', 'S', '0.989'], ['36', 'P', '0.991'], ['37', 'P', '0.990'], ['38', 'P', '0.988'], ['39', 'P', '0.983'], ['40', 'L', '0.974'], ['41', 'Q', '0.974'], ['42', 'T', '0.972'], ['43', 'S', '0.971'], ['44', 'S', '0.965'], ['45', 'G', '0.953'], ['46', 'A', '0.940

## draw figures


In [63]:
fig = plt.figure(figsize=(12,6))

lGroup = [lDisorderValue, lScoreValue, lPsipredValue]
lName = ['Disordered', 'Conservation', '2nd_Structure (0.5=alpha, 1=strand)']

n = 275
nErhStart = 95 # 96th residue
nErhEnd = 138 #139th residue
nOrder = 0

for sName, lValue in zip(lName, lGroup):
    
    ax = fig.add_subplot(3,1,nOrder+1)
    
    i_count = np.arange(n)
    j_count = lValue[:n]    
    
    ax.plot(i_count, list(map(float, j_count)), 'k-', linewidth='1.2')
    ax.axvspan(nErhStart, nErhEnd, alpha=0.2, color='crimson')
    
    if sName == 'Disordered' or sName =='Conservation':
        ax.axhline(y=0.5, color='gray', linestyle='--')
    
    ax.set_xlim([0, 275])
    ax.set_ylim([-0.1, 1.1])
    ax.set_title(sName)   
    #ax.set_xlabel('{} (log2 RPM)'.format(i))
    #ax.set_ylabel('{} (log2 RPM)'.format(j))           
    ax.set_xticks(np.arange(0, 275+1, 25))
    #ax.set_yticks(np.arange(2.5, 20, 2.5))
    
    nOrder += 1
    
#plt.suptitle('Mature miRNA abundance')
plt.tight_layout()
#plt.show()

plt.savefig(sOutdir + 'psipred.pdf', transparent=True)
plt.close()
