# Calculate CS

In [1]:
import MDAnalysis as mda
import sys
import shutil
sys.path.append("..")
import data_process
from fetch_data import gsheet_to_csv
from src.features.build_features import get_chemical_shifts
from spc_imports import *
set_up_plt()

  plt.rcParams["text.latex.preamble"] = [


In [2]:
raw_data_dir = '../data/raw/'
interim_data_dir = '../data/interim/'
processed_data_dir = '../data/processed/'
external_data_dir = '../data/external/'

In [3]:
process_trajectories = True

## Get the Data

### Get experimental data from GSheets.

In [4]:
gsheet_to_csv('KcsA_assignments', 'open_clean', external_data_dir + 'open_CS.csv')

In [5]:
gsheet_to_csv('KcsA_assignments', 'closed clean',
              external_data_dir+'closed_CS.csv')

### Reshape data 

In [6]:
closed_data = pd.read_csv(external_data_dir+'closed_CS.csv')

In [7]:
open_data = pd.read_csv(external_data_dir+'open_CS.csv')

In [8]:
for name in ['open_CS', 'closed_CS']:
    data = pd.read_csv(external_data_dir+name+'.csv')
    data['Residue'] = [ int(resid[1:]) for resid in data['Residue']]
    iterables = [data['Residue'], ['N', 'C', 'CA', 'CB']]
    index = pd.MultiIndex.from_product(iterables, names=['resid', 'nuclei'])
    data = pd.DataFrame(data[['N', 'C', 'CA', 'CB']].to_numpy().reshape(1,data.shape[0]*4),columns=index)
    iterables = [list(range(26, 121)), ['N', 'C', 'CA', 'CB']]
    index = pd.MultiIndex.from_product(iterables, names=['resid', 'nuclei'])
    data = data.reindex(index, axis=1)
    data.to_pickle(processed_data_dir + name + '.pkl')

## Make diff data

In [9]:
closed_data = pd.read_pickle(processed_data_dir+'closed_CS.pkl')

In [10]:
open_data = pd.read_pickle(processed_data_dir+'open_CS.pkl')

In [11]:
diff_data = (open_data - closed_data)

In [12]:
diff_data.to_pickle(processed_data_dir + 'diff_CS' + '.pkl')

Trick to multiindex data

In [13]:
open_data.loc[:,diff_data.dropna(axis=1).columns]

resid,33,33,33,33,34,34,34,34,38,38,...,109,109,111,111,111,111,112,112,112,112
nuclei,N,C,CA,CB,N,C,CA,CB,N,C,...,CA,CB,N,C,CA,CB,N,C,CA,CB
0,119.827,175.293,67.129,67.896,125.861,177.421,67.131,31.11,122.486,177.369,...,55.1039,17.2193,122.616,177.46,55.19,16.3,118.2,176.572,67.274,68.108


### Process trajectories

In [16]:
states = { 
    '3FB5_lb' : {'begin': 400000.,
                      'end': 1000000.},
          '5VK6_lb': {'begin': 0.,
                      'end': 350000.},
          '5VKH_lb': {'begin': 0,
                      'end': 1.e+20},
          '5VKE_lb': {'begin': 0,
                      'end': 1000000.}
         }

In [17]:
if process_trajectories:
    for key in states.keys():
        print(key)
        move_xtc_to = f'../data/interim/{key}'
        os.makedirs(move_xtc_to, exist_ok=True)
        
        data_process.process_trajectory('traj_comp.xtc', f'../data/raw/{key}',
                                        begin=states[key]['begin'],
                                        end=states[key]['end'],
                                        output_group='protein')
        if os.path.isfile(f'{move_xtc_to}/protein_sk1_pbc.xtc'):
            os.remove(f'{move_xtc_to}/protein_sk1_pbc.xtc')
        shutil.move(f'../data/raw/{key}/protein_sk1_pbc.xtc', move_xtc_to)
            
        data_process.process_trajectory('traj_comp.xtc', f'../data/raw/{key}',
                                output_group='all')
        if os.path.isfile(f'{move_xtc_to}/all_sk1_pbc.xtc'):
            os.remove(f'{move_xtc_to}/all_sk1_pbc.xtc')
        shutil.move(f'../data/raw/{key}/all_sk1_pbc.xtc', move_xtc_to)
        
        data_process.process_trajectory('charmm_final.pdb', f'../data/raw/{key}',
                                        output_group='all',name='all_pbc.gro')
        if os.path.isfile(f'{move_xtc_to}/all_pbc.gro'):
            os.remove(f'{move_xtc_to}/all_pbc.gro')
        shutil.move(f'../data/raw/{key}/all_pbc.gro', move_xtc_to)
        
        data_process.process_trajectory('charmm_final.pdb', f'../data/raw/{key}',
                                        output_group='protein')
        if os.path.isfile(f'{move_xtc_to}/protein_pbc.pdb'):
            os.remove(f'{move_xtc_to}/protein_pbc.pdb')
        shutil.move(f'../data/raw/{key}/protein_pbc.pdb', move_xtc_to)

5VKE_lb


### Get Chemical Shifts

In [16]:
for method in ['ppm','sparta_plus']:
    for state in states.keys():
        univ = mda.Universe(interim_data_dir + f'{state}/protein_pbc.pdb',
                            interim_data_dir + f'{state}/protein_pbc.xtc')
        df = get_chemical_shifts(univ, '../data/interim/',method=method)
        df.to_pickle(f'{interim_data_dir}{state}/CS_{method}_{state}.pkl')

t(A):   0%|          | 0/176 [00:00<?, ?it/s]


CalledProcessError: Command '['/home/sperez/data_partition/bin2/shiftx2/shiftx2.py', '-b', './trj0.pdb', '-p', '5.0', '-t', '298.00']' returned non-zero exit status 1.

### Reformat interim data

In [None]:
for method in ['ppm','sparta_plus']:
    for key in states.keys():
        df = pd.read_pickle(f'{interim_data_dir}{key}/CS_{method}_{key}.pkl')
        iterables = [df.index.levels[0], df.index.levels[1]]
        index = pd.MultiIndex.from_product(iterables, names=['resid', 'nuclei'])
        data=pd.DataFrame(df.to_numpy().T)
        data.columns = df.index
        data=data.reindex(index,axis=1)
        data=data.filter(regex="(C|N)")
        os.makedirs(f'{processed_data_dir}{key}', exist_ok=True)
        data.to_pickle(f'{processed_data_dir}{key}/CS_{method}_{key}.pkl')