# Calculate CS

In [1]:
import MDAnalysis as mda
import sys
import shutil
sys.path.append("..")
import data_process
from fetch_data import gsheet_to_csv
from src.features.build_features import get_chemical_shifts
from spc_imports import *
set_up_plt()

  plt.rcParams["text.latex.preamble"] = [


In [2]:
raw_data_dir = '../data/raw/'
interim_data_dir = '../data/interim/'
processed_data_dir = '../data/processed/'
external_data_dir = '../data/external/'

In [3]:
process_trajectories = False

In [4]:
%pwd

'/data/sperez/Projects/nmr_assign_state/notebooks'

## Get the Data

### Get experimental data from GSheets.

In [5]:
gsheet_to_csv('KcsA_assignments', 'open_clean', external_data_dir + 'open_CS.csv')

In [6]:
gsheet_to_csv('KcsA_assignments', 'closed clean',
              external_data_dir+'closed_CS.csv')

### Reshape data 

In [7]:
closed_data = pd.read_csv(external_data_dir+'closed_CS.csv')

In [8]:
open_data = pd.read_csv(external_data_dir+'open_CS.csv')

In [9]:
for name in ['open_CS', 'closed_CS']:
    data = pd.read_csv(external_data_dir+name+'.csv')
    data['Residue'] = [ int(resid[1:]) for resid in data['Residue']]
    iterables = [data['Residue'], ['N', 'C', 'CA', 'CB']]
    index = pd.MultiIndex.from_product(iterables, names=['resid', 'nuclei'])
    data = pd.DataFrame(data[['N', 'C', 'CA', 'CB']].to_numpy().reshape(1,data.shape[0]*4),columns=index)
    iterables = [list(range(26, 121)), ['N', 'C', 'CA', 'CB']]
    index = pd.MultiIndex.from_product(iterables, names=['resid', 'nuclei'])
    data = data.reindex(index, axis=1)
    data.to_pickle(processed_data_dir + name + '.pkl')

## Make diff data

In [10]:
closed_data = pd.read_pickle(processed_data_dir+'closed_CS.pkl')

In [11]:
open_data = pd.read_pickle(processed_data_dir+'open_CS.pkl')

In [12]:
diff_data = (open_data - closed_data)

In [13]:
diff_data.to_pickle(processed_data_dir + 'diff_CS' + '.pkl')

Trick to multiindex data

In [14]:
open_data.loc[:,diff_data.dropna(axis=1).columns]

resid,33,33,33,33,34,34,34,34,38,38,...,109,109,111,111,111,111,112,112,112,112
nuclei,N,C,CA,CB,N,C,CA,CB,N,C,...,CA,CB,N,C,CA,CB,N,C,CA,CB
0,119.827,175.293,67.129,67.896,125.861,177.421,67.131,31.11,122.486,177.369,...,55.1039,17.2193,122.616,177.46,55.19,16.3,118.2,176.572,67.274,68.108


### Process trajectories

In [15]:
states = { 
    '3FB5_lb' : {'begin': 400000.,
                      'end': 1000000.},
#           '5VK6_lb': {'begin': 0.,
#                       'end': 350000.},
#           '5VKH_lb': {'begin': 0,
#                       'end': 1.e+20},
#           '5VKE_lb': {'begin': 0,
#                       'end': 1000000.}
         }

In [16]:
if process_trajectories:
    for key in states.keys():
        print(key)
        move_xtc_to = f'../data/interim/{key}'
        os.makedirs(move_xtc_to, exist_ok=True)
        
        data_process.process_trajectory('traj_comp.xtc', f'../data/raw/{key}',
                                        begin=states[key]['begin'],
                                        end=states[key]['end'],
                                        output_group='protein')
        if os.path.isfile(f'{move_xtc_to}/protein_sk1_pbc.xtc'):
            os.remove(f'{move_xtc_to}/protein_sk1_pbc.xtc')
        shutil.move(f'../data/raw/{key}/protein_sk1_pbc.xtc', move_xtc_to)
            
        data_process.process_trajectory('traj_comp.xtc', f'../data/raw/{key}',
                                output_group='all')
        if os.path.isfile(f'{move_xtc_to}/all_sk1_pbc.xtc'):
            os.remove(f'{move_xtc_to}/all_sk1_pbc.xtc')
        shutil.move(f'../data/raw/{key}/all_sk1_pbc.xtc', move_xtc_to)
        
        data_process.process_trajectory('charmm_final.pdb', f'../data/raw/{key}',
                                        output_group='all',name='all_pbc.gro')
        if os.path.isfile(f'{move_xtc_to}/all_pbc.gro'):
            os.remove(f'{move_xtc_to}/all_pbc.gro')
        shutil.move(f'../data/raw/{key}/all_pbc.gro', move_xtc_to)
        
        data_process.process_trajectory('charmm_final.pdb', f'../data/raw/{key}',
                                        output_group='protein')
        if os.path.isfile(f'{move_xtc_to}/protein_pbc.pdb'):
            os.remove(f'{move_xtc_to}/protein_pbc.pdb')
        shutil.move(f'../data/raw/{key}/protein_pbc.pdb', move_xtc_to)

In [17]:
if process_trajectories:
    for key in states.keys():
        print(key)
        move_xtc_to = f'../data/interim/{key}'
        os.makedirs(move_xtc_to, exist_ok=True)
        
        data_process.process_trajectory('eq0.xtc', f'../data/raw/{key}',
                                        output_group='protein')
        if os.path.isfile(f'{move_xtc_to}/eq0_protein_sk1_pbc.xtc'):
            os.remove(f'{move_xtc_to}/eq0_protein_sk1_pbc.xtc')
        shutil.move(f'../data/raw/{key}/protein_sk1_pbc.xtc', move_xtc_to+'/eq0_protein_sk1_pbc.xtc')
                
        data_process.process_trajectory('eq.xtc', f'../data/raw/{key}',
                                        output_group='protein')
        if os.path.isfile(f'{move_xtc_to}/eq_protein_sk1_pbc.xtc'):
            os.remove(f'{move_xtc_to}/eq_protein_sk1_pbc.xtc')
        shutil.move(f'../data/raw/{key}/protein_sk1_pbc.xtc', move_xtc_to+'/eq_protein_sk1_pbc.xtc')    

In [18]:
if process_trajectories:
    for key in states.keys():
        print(key)
        move_xtc_to = f'../data/interim/{key}'
        os.makedirs(move_xtc_to, exist_ok=True)
        
        data_process.process_trajectory('eq0.xtc', f'../data/raw/{key}',
                                        output_group='all')
        if os.path.isfile(f'{move_xtc_to}/eq0_all_sk1_pbc.xtc'):
            os.remove(f'{move_xtc_to}/eq0_all_sk1_pbc.xtc')
        shutil.move(f'../data/raw/{key}/all_sk1_pbc.xtc', move_xtc_to+'/eq0_all_sk1_pbc.xtc')
                
        data_process.process_trajectory('eq.xtc', f'../data/raw/{key}',
                                        output_group='all')
        if os.path.isfile(f'{move_xtc_to}/eq_all_sk1_pbc.xtc'):
            os.remove(f'{move_xtc_to}/eq_all_sk1_pbc.xtc')
        shutil.move(f'../data/raw/{key}/all_sk1_pbc.xtc', move_xtc_to+'/eq_all_sk1_pbc.xtc')    

### Get Chemical Shifts

In [None]:
for method in ['ppm', 'sparta_plus']:
    for state in states.keys():
        univ = mda.Universe(interim_data_dir + f'{state}/protein_pbc.pdb',
                            interim_data_dir + f'{state}/protein_sk1_pbc.xtc')
        df = get_chemical_shifts(univ, '../data/processed/', method=method,
                                 skip=1)
        df.to_pickle(f'{processed_data_dir}{state}/CS_{method}_{state}.pkl')

100%|██████████| 2/2 [00:17<00:00,  8.51s/it]
 50%|█████     | 1/2 [03:27<03:27, 207.54s/it]

In [None]:
df