# Analysis

### Setup

In [1]:
import os
import pandas as pd
from src import analyse_data
from src import analyse_results
import mdtraj as md

## Analysis of data

Analysing input data using ``src/analyse_data``, including:
- CIDER parameters from Pappu Lab

### Calculating CIDER parameters

In [12]:
# Loading vars.json
json_vars_path = 'data/seqs/vars.json'
vars = pd.read_json(json_vars_path, orient='index')

# Loading sequences
vars['sequence'] = vars.apply(lambda row: analyse_data.load_fasta_seq(row['var_data'])[0], axis=1)

# Calculating parameters
params = pd.concat([analyse_data.cider_parameters(seq, name) for name, seq in zip(vars.index, vars['sequence'])])

# Presenting documentation and results
print(analyse_data.cider_parameters.__name__.upper() + '():')
print(analyse_data.cider_parameters.__doc__)
params.sort_values(by='kappa')

CIDER_PARAMETERS():

    Takes a sequence, returns a DataFrame of its CIDER parameters.

    More on CIDER from PappuLab:
    - [CIDER](http://pappulab.wustl.edu/CIDER/about/)
    - [localCIDER](http://pappulab.github.io/localCIDER/)

    :param seq: Sequence to calculate parameters for
    :param name: Index of the single row in the DataFrame
    :return: A single-row DataFrame with select CIDER parameters
    


Unnamed: 0,kappa,FCR,NCPR,Hydrophobicity,Frac. dis. prom.
H3-4_WT,0.111152,0.325581,0.325581,3.232558,0.883721
H1-1_WT,0.130636,0.360656,0.344262,3.388525,0.885246
H3-4_RAND,0.141248,0.325581,0.325581,3.232558,0.883721
H1-3_WT,0.147564,0.363636,0.348485,3.375,0.916667
H1-4_WT,0.14824,0.375,0.359375,3.391406,0.953125
H1-0_WT,0.154432,0.432432,0.378378,3.24955,0.891892
H1-2_WT,0.155614,0.368852,0.352459,3.515574,0.893443
H1-0_RAND,0.173253,0.432432,0.378378,3.24955,0.891892
H1-3_RAND,0.196721,0.363636,0.348485,3.375,0.916667
H1-2_RAND,0.206212,0.368852,0.352459,3.515574,0.893443


## Analysis of results

Analysing input data using ``src/analyse_data``, including:

- Initial test runs of histones H1-0 and H2B variants

### Initial test runs

In [13]:
# Loading experiment
exp_dir = 'results/calvados/initial/'
experiments = os.listdir(exp_dir)
sims = pd.DataFrame({
    'sequence': [analyse_data.load_fasta_seq('/'.join([exp_dir, exp, exp + '.fasta']))[0] for exp in experiments],
    'description': [analyse_data.load_fasta_seq('/'.join([exp_dir, exp, exp + '.fasta']))[2] for exp in experiments]},
    index = experiments)

# Calculating simulation specs
sims['length'] = sims.apply(lambda row: len(row['sequence']), axis=1)
sims['time_h'] = sims.apply(lambda row: analyse_results.simulation_time('/'.join([exp_dir, row.name, 'traj.log'])), axis=1)

sims

Unnamed: 0,sequence,description,length,time_h
H1-0_WT,TKGVGASGSFRLAKSDEPKKSVAFKKTKKEIKKVATPKKASKPKKA...,Wild-type IDR,111,6.283084
H2B_WT,MPEPAKSAPAPKKGSKKAVTKAQKKDGKKRKRSRK,Wild-type IDR,35,4.929693
H1-0_RAND,PGTKKPKSTKTKKALAKAKKAKVKGSKAEKATSAAKKSVKPKKSKA...,Random shuffle IDR,111,6.409824
H2B_CLUST,RKRKKKKKKKRKKKKASAVTAAQGPPGSAPPMSED,Clustered charges IDR,35,5.971135
H1-0_CLUST,KKKKKKKKKKRKKKKKKKKKKKKKKKKKKKKKRKKKKKKKKKKKKA...,Clustered charges IDR,111,6.674773
H2B_RAND,EKKKKSDMKPKRKPVASQAAKPKSRKKKTPGRGAA,Random shuffle IDR,35,4.8357


In [None]:
# Loading trajectory
sims['trajectory'] = sims.apply(lambda row: md.load(*['/'.join([exp_dir, row.name, filename]) for filename  in ['traj.dcd', 'top.pdb']]), axis=1)

# Calculating Rg
sims['R_g'] = sims.apply(lambda row: md.compute_rg(row['trajectory']), axis=1)

sims['R_g']

TODO
- run ``xcode-select --install``, reboot, and then ``pip install mdtraj``
- finish mdtraj code