In [1]:
# BPTI Gen Vel analysis
import MDAnalysis as mda
import numpy as np
from MDAnalysis.analysis import align, rms,pca
import copy

# import pca from scikit-learn

from sklearn.decomposition import PCA
# import tsne
from sklearn.manifold import TSNE

# xMD testing
import pandas as pd
import os
from xMD.xMD import xMD
from xMD.MD_Settings import GROMACS_Settings

settings = GROMACS_Settings()


amber14sb_ff_path = os.path.join(os.getcwd())

# Set the GMXLIB environment variable
os.environ["GMXLIB"] = amber14sb_ff_path



settings = GROMACS_Settings()
settings.suffix = "APO_amber99"
# settings.search = "APO"

print(settings.config)

settings.topology = os.path.join(settings.topology,"BPTI")
print(settings.topology)
# make sure to turn on MPI for HPC 
settings.gmx_mpi_on = False



  from .autonotebook import tqdm as notebook_tqdm

Due to the on going maintenance burden of keeping command line application
wrappers up to date, we have decided to deprecate and eventually remove these
modules.

We instead now recommend building your command line and invoking it directly
with the subprocess module.


/home/alexi/Documents/xMD
config
topology/BPTI


In [2]:
def load_cattraj_to_mda(cattraj, top_path:str):
    
    if top_path is None:
        top_path = cattraj.replace(".xtc", ".pdb")

    u = mda.Universe(top_path, cattraj)


    return u


def mda_to_df(u: mda.Universe, pdbcode:str, name:str):

    df = pd.DataFrame(columns=["pdbcode", "name", "frame", "rmsd"])

    # Reference for RMSD (first frame)
    ref = u.copy()
    ref.trajectory[0]

    # Calculate RMSD
    rmsd_analysis = rms.RMSD(u, ref, select="name CA")
    rmsd_analysis.run()


    # Populate DataFrame
    for i, frame in enumerate(u.trajectory):
        frame_rmsd = rmsd_analysis.rmsd[i, 2]  # RMSD value
        df_to_add = pd.DataFrame([[pdbcode, name, i, frame_rmsd, 0, 0]], columns=df.columns)
        df = pd.concat([df, df_to_add], ignore_index=True)
    
    return df

In [3]:
def run_multi_analysis(pdbcode:str, names:list, reps:int):

    df = pd.DataFrame(columns=["pdbcode", "name", "frame", "rmsd", "rep"])

    paths = []

    for name in names:
        for rep in range(1,reps+1):
            print(f"Running {name} rep {rep}")
            
            md = xMD(settings, name, pdbcode, rep)
            
            data_dir = md.generate_path_structure()
            viz_dir = data_dir.replace("data", "visualisation")

            rep_dir = "R_" + str(rep)


            cat_traj_name = "_".join([md.settings.suffix,
                                        md.settings.pdbcode,
                                        str(rep)]) + "-nojump" + ".xtc"

            cat_traj_path = os.path.join(data_dir, rep_dir, cat_traj_name)
            paths.append(cat_traj_path)
            top_name = cat_traj_name.replace(".xtc", ".pdb")
            top_path = os.path.join(data_dir, rep_dir, top_name)

            u = load_cattraj_to_mda(cat_traj_path, top_path)
            
            df_to_add = mda_to_df(u, pdbcode, name)
            
    print(df.head())
    print(df.tail())

    dim_u = mda.Universe(top_path, *paths)
    
    coordinates = np.zeros((dim_u.trajectory.n_frames, dim_u.atoms.n_atoms, 3))
    for i, frame in enumerate(dim_u.trajectory):
        coordinates[i] = frame.positions
    n_frames = coordinates.shape[0]
    n_atoms = coordinates.shape[1]
    coordinates = coordinates.reshape(n_frames, n_atoms * 3)


    print(coordinates.shape)


    pca = PCA(n_components=2)
    pca_results = pca.fit_transform(coordinates)

    # Add the PCA results to the DataFrame
    # Ensure df has the same number of rows as there are frames in the MD trajectory
    df["PCA1"] = pca_results[:, 0]
    df["PCA2"] = pca_results[:, 1]

    tsne = TSNE(n_components=2)

    tsne_results = tsne.fit_transform(coordinates)

    df["tSNE1"] = tsne_results[:, 0]
    df["tSNE2"] = tsne_results[:, 1]    


    return df


In [12]:
test_df = run_multi_analysis("5PTI", ["BPTI_genvel2","BPTI_genvel3","BPTI_genvel4"], 1)

Running BPTI_genvel2 rep 1
Replicate number:  1
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel2
Trial directory logs:  logs/MD/5PTI/BPTI_genvel2
Trial directory data:  data/MD/5PTI/BPTI_genvel2
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel2
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel2
Environment variables set:  GMXLIB /home/alexi/Documents/xMD
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel2
Trial directory logs:  logs/MD/5PTI/BPTI_genvel2
Trial directory data:  data/MD/5PTI/BPTI_genvel2
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel2
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel2
Running BPTI_genvel3 rep 1
Replicate number:  1
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel3
Trial directory logs:  logs/MD/5PTI/BPTI_genvel3
Trial directory data:  data/MD/5PTI/BPTI_genvel3
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel3
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel3



The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determi

(123, 2676)


In [13]:
test_df.head()

Unnamed: 0,pdbcode,name,frame,rmsd,PCA1,PCA2,tSNE1,tSNE2
0,5PTI,BPTI_genvel2,0,0.0,-17.520348,1.819369,1.206478,-6.307302
1,5PTI,BPTI_genvel2,1,0.248398,-13.331205,6.05772,2.54999,-6.758071
2,5PTI,BPTI_genvel2,2,0.251201,-7.972039,4.018529,3.439582,-4.383727
3,5PTI,BPTI_genvel2,3,0.260589,-11.164202,2.399386,-3.213284,-3.41837
4,5PTI,BPTI_genvel2,4,0.268593,-7.535991,-3.42668,-6.653988,-0.653764


In [14]:
# plot rmsd using plotly
import plotly.express as px


In [None]:
#plot pca



In [7]:
md = xMD(settings, 'BPTI_genvel1', "5PTI", 1)


Replicate number:  1
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel1
Trial directory logs:  logs/MD/5PTI/BPTI_genvel1
Trial directory data:  data/MD/5PTI/BPTI_genvel1
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel1
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel1
Environment variables set:  GMXLIB /home/alexi/Documents/xMD


In [8]:
md.generate_path_structure()

Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel1
Trial directory logs:  logs/MD/5PTI/BPTI_genvel1
Trial directory data:  data/MD/5PTI/BPTI_genvel1
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel1
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel1


'data/MD/5PTI/BPTI_genvel1'