In [137]:
# BPTI Gen Vel analysis
import MDAnalysis as mda
import numpy as np
from MDAnalysis.analysis import align, rms,pca
import copy
import phate 
# import pca from scikit-learn

from sklearn.decomposition import PCA
# import tsne
from sklearn.manifold import TSNE

# xMD testing
import pandas as pd
import os
from xMD.xMD import xMD
from xMD.MD_Settings import GROMACS_Settings

settings = GROMACS_Settings()


amber14sb_ff_path = os.path.join(os.getcwd())

# Set the GMXLIB environment variable
os.environ["GMXLIB"] = amber14sb_ff_path



settings = GROMACS_Settings()
settings.suffix = "APO_amber99"
# settings.search = "APO"

print(settings.config)

settings.topology = os.path.join(settings.topology,"BPTI")
print(settings.topology)
# make sure to turn on MPI for HPC 
settings.gmx_mpi_on = False



config
topology/BPTI


In [138]:
def load_cattraj_to_mda(cattraj, top_path:str):
    
    if top_path is None:
        top_path = cattraj.replace(".xtc", ".pdb")

    u = mda.Universe(top_path, cattraj)


    return u


def rmsd_to_df(u: mda.Universe, pdbcode:str, name:str, rep:int):

    df = pd.DataFrame(columns=["pdbcode", "name", "frame", "rmsd", "rep"])

    # Reference for RMSD (first frame)
    ref = u.copy()
    ref.trajectory[0]

    # Calculate RMSD
    rmsd_analysis = rms.RMSD(u, ref, select="name CA")
    rmsd_analysis.run()


    # Populate DataFrame
    for i, frame in enumerate(u.trajectory):
        frame_rmsd = rmsd_analysis.rmsd[i, 2]  # RMSD value
        df_to_add = pd.DataFrame([[pdbcode, name, i, frame_rmsd, rep]], columns=df.columns)
        df = pd.concat([df, df_to_add], ignore_index=True)
    
    return df

In [139]:
def run_multi_analysis(pdbcode: str, names: list, num_reps: int):

    df = pd.DataFrame(columns=["pdbcode", "name", "frame", "rmsd", "rep"])

    paths = []
    lengths = []
    reps = []

    for name in names:
        for rep in range(1,num_reps+1):
            print(f"Running {name} rep {rep}")
            
            md = xMD(settings, name, pdbcode, rep)
            
            data_dir = md.generate_path_structure()
            viz_dir = data_dir.replace("data", "visualisation")

            rep_dir = "R_" + str(rep)


            top_name = "_".join([md.settings.suffix,
                                        md.settings.pdbcode,
                                        str(1)]) + "-nojump" + ".pdb"
            

            cat_traj_name = "_".join([md.settings.suffix,
                                        md.settings.pdbcode]) + "_cat_"+str(rep) + ".xtc"

            cat_traj_path = os.path.join(data_dir, rep_dir, cat_traj_name)
            paths.append(cat_traj_path)
            print(cat_traj_path)
            top_path = os.path.join(data_dir, rep_dir, top_name)

            u = load_cattraj_to_mda(cat_traj_path, top_path)
            
            lengths.append(u.trajectory.n_frames)
            reps.append(rep)

            df_to_add = rmsd_to_df(u, pdbcode, name, rep)

            print(df_to_add)


            df = pd.concat([df, df_to_add], ignore_index=True)




    print(df)
    print(df.tail())

    dim_u = mda.Universe(top_path, *paths)
    ref_frame = dim_u.trajectory[0]
    ref_atoms = dim_u.select_atoms("name CA") 
    #align to first frame
    align.alignto(dim_u, ref_atoms, select="name CA")

    # save to temp dir 
    temp_dir = os.path.join(md.settings.temporary_directory, "MD", pdbcode)

    save_path = os.path.join(temp_dir, "dim_u.pdb")

    dim_u.trajectory[0]
    with mda.Writer(save_path, dim_u.atoms.n_atoms) as W:
        for ts in dim_u.trajectory:
            W.write(dim_u.atoms)


    # Get coordinates
    coordinates = np.zeros((dim_u.trajectory.n_frames, dim_u.atoms.n_atoms, 3))
    for i, frame in enumerate(dim_u.trajectory):
        coordinates[i] = frame.positions
    n_frames = coordinates.shape[0]
    n_atoms = coordinates.shape[1]


    # find average coordinates
    average_coordinates = np.mean(coordinates, axis=0)

    # find distance from average
    distance_from_average = np.zeros((n_frames, n_atoms))
    for i in range(n_frames):
        distance_from_average[i] = np.sqrt(np.sum((coordinates[i] - average_coordinates)**2, axis=1))

    

    print(coordinates.shape)
    print(distance_from_average.shape)

    pca = PCA(n_components=2)
    pca_results = pca.fit_transform(distance_from_average)

    # Add the PCA results to the DataFrame
    # Ensure df has the same number of rows as there are frames in the MD trajectory
    df["PCA1"] = pca_results[:, 0]
    df["PCA2"] = pca_results[:, 1]

    tsne = TSNE(n_components=2)

    tsne_results = tsne.fit_transform(distance_from_average)

    df["tSNE1"] = tsne_results[:, 0]
    df["tSNE2"] = tsne_results[:, 1]  

    # PHATE
    phate_op = phate.PHATE()

    phate_results = phate_op.fit_transform(distance_from_average)

    df["PHATE1"] = phate_results[:, 0]
    df["PHATE2"] = phate_results[:, 1]

    frames_per_rep = [frame for length in lengths for frame in range(1, length + 1)]
    reps_per_frame = [rep for rep, length in zip(reps, lengths) for _ in range(length)]

    df["frame"] = frames_per_rep
    df["rep"] = reps_per_frame

    df["traj"] = df["name"] + "_" + df["rep"].astype(str)

    return df


In [140]:
test_df = run_multi_analysis("5PTI", ["BPTI_genvel1","BPTI_genvel2","BPTI_genvel3","BPTI_genvel4","BPTI_genvel6"],5)

Running BPTI_genvel1 rep 1
Replicate number:  1
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel1
Trial directory logs:  logs/MD/5PTI/BPTI_genvel1
Trial directory data:  data/MD/5PTI/BPTI_genvel1
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel1
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel1
Environment variables set:  GMXLIB /home/alexi/Documents/xMD
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel1
Trial directory logs:  logs/MD/5PTI/BPTI_genvel1
Trial directory data:  data/MD/5PTI/BPTI_genvel1
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel1
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel1
data/MD/5PTI/BPTI_genvel1/R_1/APO_amber99_5PTI_cat_1.xtc
    pdbcode          name frame          rmsd rep
0      5PTI  BPTI_genvel1     0  1.770929e-07   1
1      5PTI  BPTI_genvel1     1  3.133262e-01   1
2      5PTI  BPTI_genvel1     2  3.275709e-01   1
3      5PTI  BPTI_genvel1     3  4.289775e-01   1
4      5PTI  BPTI_genv


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determi

    pdbcode          name frame          rmsd rep
0      5PTI  BPTI_genvel1     0  2.504471e-07   3
1      5PTI  BPTI_genvel1     1  2.865876e-01   3
2      5PTI  BPTI_genvel1     2  3.009970e-01   3
3      5PTI  BPTI_genvel1     3  2.657138e-01   3
4      5PTI  BPTI_genvel1     4  3.016803e-01   3
..      ...           ...   ...           ...  ..
221    5PTI  BPTI_genvel1   221  1.206830e+00   3
222    5PTI  BPTI_genvel1   222  1.198508e+00   3
223    5PTI  BPTI_genvel1   223  1.221125e+00   3
224    5PTI  BPTI_genvel1   224  1.440220e+00   3
225    5PTI  BPTI_genvel1   225  1.231908e+00   3

[226 rows x 5 columns]
Running BPTI_genvel1 rep 4
Replicate number:  4
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel1
Trial directory logs:  logs/MD/5PTI/BPTI_genvel1
Trial directory data:  data/MD/5PTI/BPTI_genvel1
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel1
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel1
Environment variables set:  GMXLIB /home/alexi/


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatena

    pdbcode          name frame          rmsd rep
0      5PTI  BPTI_genvel2     0  1.770929e-07   1
1      5PTI  BPTI_genvel2     1  2.561002e-01   1
2      5PTI  BPTI_genvel2     2  2.614082e-01   1
3      5PTI  BPTI_genvel2     3  2.341592e-01   1
4      5PTI  BPTI_genvel2     4  2.591327e-01   1
..      ...           ...   ...           ...  ..
221    5PTI  BPTI_genvel2   221  1.663970e+00   1
222    5PTI  BPTI_genvel2   222  1.830041e+00   1
223    5PTI  BPTI_genvel2   223  1.553991e+00   1
224    5PTI  BPTI_genvel2   224  1.427969e+00   1
225    5PTI  BPTI_genvel2   225  1.536234e+00   1

[226 rows x 5 columns]
Running BPTI_genvel2 rep 2
Replicate number:  2
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel2
Trial directory logs:  logs/MD/5PTI/BPTI_genvel2
Trial directory data:  data/MD/5PTI/BPTI_genvel2
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel2
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel2
Environment variables set:  GMXLIB /home/alexi/


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatena

    pdbcode          name frame          rmsd rep
0      5PTI  BPTI_genvel2     0  1.770929e-07   4
1      5PTI  BPTI_genvel2     1  2.430898e-01   4
2      5PTI  BPTI_genvel2     2  2.608044e-01   4
3      5PTI  BPTI_genvel2     3  3.013468e-01   4
4      5PTI  BPTI_genvel2     4  2.803474e-01   4
..      ...           ...   ...           ...  ..
221    5PTI  BPTI_genvel2   221  1.381946e+00   4
222    5PTI  BPTI_genvel2   222  1.843744e+00   4
223    5PTI  BPTI_genvel2   223  1.558792e+00   4
224    5PTI  BPTI_genvel2   224  1.733475e+00   4
225    5PTI  BPTI_genvel2   225  1.724735e+00   4

[226 rows x 5 columns]
Running BPTI_genvel2 rep 5
Replicate number:  5
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel2
Trial directory logs:  logs/MD/5PTI/BPTI_genvel2
Trial directory data:  data/MD/5PTI/BPTI_genvel2
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel2
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel2
Environment variables set:  GMXLIB /home/alexi/


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatena

    pdbcode          name frame          rmsd rep
0      5PTI  BPTI_genvel3     0  2.504471e-07   2
1      5PTI  BPTI_genvel3     1  6.128478e-01   2
2      5PTI  BPTI_genvel3     2  6.199188e-01   2
3      5PTI  BPTI_genvel3     3  6.413950e-01   2
4      5PTI  BPTI_genvel3     4  7.125599e-01   2
..      ...           ...   ...           ...  ..
221    5PTI  BPTI_genvel3   221  1.453397e+00   2
222    5PTI  BPTI_genvel3   222  1.599076e+00   2
223    5PTI  BPTI_genvel3   223  1.453441e+00   2
224    5PTI  BPTI_genvel3   224  1.315387e+00   2
225    5PTI  BPTI_genvel3   225  1.386295e+00   2

[226 rows x 5 columns]
Running BPTI_genvel3 rep 3
Replicate number:  3
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel3
Trial directory logs:  logs/MD/5PTI/BPTI_genvel3
Trial directory data:  data/MD/5PTI/BPTI_genvel3
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel3
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel3
Environment variables set:  GMXLIB /home/alexi/


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatena

    pdbcode          name frame      rmsd rep
0      5PTI  BPTI_genvel3     0  0.000000   5
1      5PTI  BPTI_genvel3     1  0.572827   5
2      5PTI  BPTI_genvel3     2  0.738719   5
3      5PTI  BPTI_genvel3     3  0.659528   5
4      5PTI  BPTI_genvel3     4  0.758698   5
..      ...           ...   ...       ...  ..
221    5PTI  BPTI_genvel3   221  1.036762   5
222    5PTI  BPTI_genvel3   222  1.160871   5
223    5PTI  BPTI_genvel3   223  1.083283   5
224    5PTI  BPTI_genvel3   224  1.129250   5
225    5PTI  BPTI_genvel3   225  1.320131   5

[226 rows x 5 columns]
Running BPTI_genvel4 rep 1
Replicate number:  1
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel4
Trial directory logs:  logs/MD/5PTI/BPTI_genvel4
Trial directory data:  data/MD/5PTI/BPTI_genvel4
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel4
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel4
Environment variables set:  GMXLIB /home/alexi/Documents/xMD
Trial directory temporary:  tempor


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatena

    pdbcode          name frame          rmsd rep
0      5PTI  BPTI_genvel4     0  1.770929e-07   3
1      5PTI  BPTI_genvel4     1  2.596986e-01   3
2      5PTI  BPTI_genvel4     2  2.504437e-01   3
3      5PTI  BPTI_genvel4     3  2.644285e-01   3
4      5PTI  BPTI_genvel4     4  2.652352e-01   3
..      ...           ...   ...           ...  ..
221    5PTI  BPTI_genvel4   221  1.305788e+00   3
222    5PTI  BPTI_genvel4   222  1.192482e+00   3
223    5PTI  BPTI_genvel4   223  1.055024e+00   3
224    5PTI  BPTI_genvel4   224  1.016009e+00   3
225    5PTI  BPTI_genvel4   225  1.247892e+00   3

[226 rows x 5 columns]
Running BPTI_genvel4 rep 4
Replicate number:  4
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel4
Trial directory logs:  logs/MD/5PTI/BPTI_genvel4
Trial directory data:  data/MD/5PTI/BPTI_genvel4
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel4
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel4
Environment variables set:  GMXLIB /home/alexi/


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatena

    pdbcode          name frame      rmsd rep
0      5PTI  BPTI_genvel6     0  0.000000   1
1      5PTI  BPTI_genvel6     1  0.518350   1
2      5PTI  BPTI_genvel6     2  0.745430   1
3      5PTI  BPTI_genvel6     3  0.811971   1
4      5PTI  BPTI_genvel6     4  0.761922   1
..      ...           ...   ...       ...  ..
221    5PTI  BPTI_genvel6   221  1.342104   1
222    5PTI  BPTI_genvel6   222  1.256182   1
223    5PTI  BPTI_genvel6   223  1.271084   1
224    5PTI  BPTI_genvel6   224  1.528505   1
225    5PTI  BPTI_genvel6   225  1.276090   1

[226 rows x 5 columns]
Running BPTI_genvel6 rep 2
Replicate number:  2
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel6
Trial directory logs:  logs/MD/5PTI/BPTI_genvel6
Trial directory data:  data/MD/5PTI/BPTI_genvel6
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel6
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel6
Environment variables set:  GMXLIB /home/alexi/Documents/xMD
Trial directory temporary:  tempor


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatena

    pdbcode          name frame          rmsd rep
0      5PTI  BPTI_genvel6     0  2.504471e-07   4
1      5PTI  BPTI_genvel6     1  6.250421e-01   4
2      5PTI  BPTI_genvel6     2  6.894706e-01   4
3      5PTI  BPTI_genvel6     3  6.504290e-01   4
4      5PTI  BPTI_genvel6     4  7.524961e-01   4
..      ...           ...   ...           ...  ..
221    5PTI  BPTI_genvel6   221  1.051259e+00   4
222    5PTI  BPTI_genvel6   222  1.200828e+00   4
223    5PTI  BPTI_genvel6   223  1.028238e+00   4
224    5PTI  BPTI_genvel6   224  1.192385e+00   4
225    5PTI  BPTI_genvel6   225  9.699499e-01   4

[226 rows x 5 columns]
Running BPTI_genvel6 rep 5
Replicate number:  5
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel6
Trial directory logs:  logs/MD/5PTI/BPTI_genvel6
Trial directory data:  data/MD/5PTI/BPTI_genvel6
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel6
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel6
Environment variables set:  GMXLIB /home/alexi/


The `rmsd` attribute was deprecated in MDAnalysis 2.0.0 and will be removed in MDAnalysis 3.0.0. Please use `results.rmsd` instead.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


Found no information for attr: 'formalcharges' Using default value of '0'


Found missing chainIDs. Corresponding atoms will use value of 'X'



(5650, 892, 3)
(5650, 892)
Calculating PHATE...
  Running PHATE on 5650 observations and 892 variables.
  Calculating graph and diffusion operator...
    Calculating PCA...
    Calculated PCA in 0.11 seconds.
    Calculating KNN search...
    Calculated KNN search in 0.56 seconds.
    Calculating affinities...
    Calculated affinities in 0.02 seconds.
  Calculated graph and diffusion operator in 0.70 seconds.
  Calculating landmark operator...
    Calculating SVD...
    Calculated SVD in 0.16 seconds.
    Calculating KMeans...
    Calculated KMeans in 1.51 seconds.
  Calculated landmark operator in 2.03 seconds.
  Calculating optimal t...
    Automatically selected t = 49
  Calculated optimal t in 0.70 seconds.
  Calculating diffusion potential...
  Calculated diffusion potential in 0.35 seconds.
  Calculating metric MDS...
  Calculated metric MDS in 2.17 seconds.
Calculated PHATE in 5.95 seconds.


In [141]:
print(test_df.to_string())

     pdbcode          name  frame          rmsd  rep        PCA1        PCA2      tSNE1      tSNE2    PHATE1    PHATE2            traj
0       5PTI  BPTI_genvel1      1  1.770929e-07    1  -72.041900    4.597205 -55.020016 -12.835320 -0.007155 -0.016252  BPTI_genvel1_1
1       5PTI  BPTI_genvel1      2  3.133262e-01    1  -64.295779    5.928279 -49.197315 -17.204510 -0.010309 -0.010320  BPTI_genvel1_1
2       5PTI  BPTI_genvel1      3  3.275709e-01    1  -62.026089    2.956961 -50.207447 -17.995174 -0.010561 -0.009873  BPTI_genvel1_1
3       5PTI  BPTI_genvel1      4  4.289775e-01    1  -75.242333   -0.033764 -58.434853 -17.898327 -0.008103 -0.014467  BPTI_genvel1_1
4       5PTI  BPTI_genvel1      5  2.695421e-01    1  -73.763941    1.017365 -56.940033  -8.326806 -0.007191 -0.016343  BPTI_genvel1_1
5       5PTI  BPTI_genvel1      6  2.856357e-01    1  -76.236126    2.837714 -58.071888  -4.742267 -0.006798 -0.017160  BPTI_genvel1_1
6       5PTI  BPTI_genvel1      7  2.905204e-01    1  -

In [142]:
# combine rep and name cols

test_df["traj"] = test_df["name"] + "_" + test_df["rep"].astype(str)

In [143]:
# plot rmsd using plotly
import plotly.express as px


In [144]:
#plot split by name  

fig = px.line(test_df, x="frame", y="rmsd", color="rep", facet_col="name", facet_col_wrap=2)
fig.show()

In [186]:
#plot split by name  

fig = px.line(test_df, x="frame", y="rmsd", color="traj")

fig.show()

In [202]:
# plot PCA colour by name and rep plot all on one graph

fig = px.scatter(test_df, x="PCA1", y="PCA2", color="frame", color_continuous_scale="Tropic", opacity=0.8)

fig.show()

In [194]:
# plot PCA colour by density plot all on one graph

fig = px.density_contour(test_df.loc[test_df["frame"] > 150], x="PCA1", y="PCA2", facet_col="name", facet_col_wrap=2)
fig.update_traces(contours_coloring="fill", contours_showlabels = True)
fig.show()

In [195]:
# plot PCA colour by density plot all on one graph

fig = px.density_contour(test_df.loc[test_df["frame"] > 150], x="tSNE1", y="tSNE2", facet_col="name", facet_col_wrap=2)
fig.update_traces(contours_coloring="fill", contours_showlabels = True)
fig.show()

In [196]:
# plot PCA colour by density plot all on one graph

fig = px.density_contour(test_df.loc[test_df["frame"] > 150], x="PHATE1", y="PHATE2", facet_col="name", facet_col_wrap=2)
fig.update_traces(contours_coloring="fill", contours_showlabels = True)
fig.show()

In [205]:
# plot PCA colour by traj plot all on one graph

fig = px.scatter(test_df, x="PHATE1", y="PHATE2", color="frame", color_continuous_scale="Tropic", opacity=0.80, hover_data=["traj"])

fig.show()

In [173]:
# plot PCA colour by name and rep plot all on one graph

fig = px.scatter(test_df, x="tSNE1", y="tSNE2", color="frame", color_continuous_scale="Tropic", opacity=0.8)

fig.show()

In [149]:
# plot PCA colour by name and rep plot all on one graph

fig = px.scatter(test_df, x="PCA1", y="PCA2", color="rep", facet_col="name", facet_col_wrap=2, color_continuous_scale=px.colors.sequential.Viridis)

fig.show()

In [150]:
# plot PCA colour by name and rep plot all on one graph

fig = px.scatter(test_df, x="tSNE1", y="tSNE2", color="rep", facet_col="name", facet_col_wrap=2, color_continuous_scale=px.colors.sequential.Viridis)

fig.show()

In [151]:
# plot PCA colour by name and rep plot all on one graph

fig = px.scatter(test_df, x="PHATE1", y="PHATE2", color="rep", facet_col="name", facet_col_wrap=2, color_continuous_scale=px.colors.sequential.Viridis)

fig.show()

In [152]:
#plot PCA colour by frame and name

fig = px.scatter(test_df, x="PCA1", y="PCA2", color="frame", facet_col="name", facet_col_wrap=2, color_continuous_scale="Tropic", opacity=0.8)

fig.show()



In [153]:
#plot PCA colour by frame and name

fig = px.scatter(test_df, x="tSNE1", y="tSNE2", color="frame", facet_col="name", facet_col_wrap=2, color_continuous_scale="Tropic", opacity=0.8)

fig.show()


In [154]:
#plot PCA colour by frame and name

fig = px.scatter(test_df, x="PHATE1", y="PHATE2", color="frame", facet_col="name", facet_col_wrap=2, color_continuous_scale="Tropic", opacity=0.8)

fig.show()


In [155]:
#plot PCA colour by frame and name filter by frame below 5 and above 220

fig = px.scatter(test_df.loc[(test_df["frame"] < 5) | (test_df["frame"] > 220)], x="PCA1", y="PCA2", color="frame", facet_col="name", facet_col_wrap=2, color_continuous_scale="Tropic", opacity=0.8)

fig.show()

In [156]:
#plot PCA colour by frame and name filter by frame below 5 and above 220

fig = px.scatter(test_df.loc[(test_df["frame"] < 5) | (test_df["frame"] > 220)], x="tSNE1", y="tSNE2", color="frame", facet_col="name", facet_col_wrap=2, color_continuous_scale="Tropic", opacity=0.8)

fig.show()

In [157]:
#plot PCA colour by frame and name filter by frame below 5 and above 220

fig = px.scatter(test_df.loc[(test_df["frame"] < 5) | (test_df["frame"] > 220)], x="PHATE1", y="PHATE2", color="frame", facet_col="name", facet_col_wrap=2, color_continuous_scale="Tropic", opacity=0.8)

fig.show()

In [158]:
# calculate the centroid of the PCA for the first 5 frames

first_5_frames = test_df.loc[test_df["frame"] < 5]

# print(first_5_frames)

centroid = np.mean(first_5_frames[["PCA1", "PCA2"]], axis=0)

# print(centroid)

# calculate the distance from the centroid for each frame

test_df["distance_from_centroid"] = np.sqrt((test_df["PCA1"] - centroid[0])**2 + (test_df["PCA2"] - centroid[1])**2)

# plot the distance from the centroid as histogram for the last 25 frames

grouped_df = test_df.loc[test_df["frame"] > 20]

fig = px.histogram(grouped_df,
                    x="distance_from_centroid", color="name", facet_col="name", facet_col_wrap=3)
fig.show()
# plot the mean and standard deviation of the distance from the centroid for each name
# Perform the groupby and aggregation
grouped_df = test_df.groupby(["name", "rep"]).agg({"distance_from_centroid": ["mean", "std"]}).reset_index()

# Flatten the multi-level columns
grouped_df.columns = ['_'.join(col).strip() if col[1] else col[0] for col in grouped_df.columns.values]

# Plot the mean and standard deviation of the distance from the centroid for each name
fig = px.bar(grouped_df,
             x="name", y="distance_from_centroid_mean", 
             error_y="distance_from_centroid_std")

fig.show()


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [159]:
# calculate the centroid of the PCA for the first 5 frames

first_5_frames = test_df.loc[test_df["frame"] < 5]

# print(first_5_frames)

centroid = np.mean(first_5_frames[["PCA1", "PCA2"]], axis=0)

# print(centroid)

# calculate the distance from the centroid for each frame

test_df["distance_from_centroid"] = np.sqrt((test_df["PCA1"] - centroid[0])**2 + (test_df["PCA2"] - centroid[1])**2)

# plot the distance from the centroid as histogram for the last 25 frames

grouped_df = test_df.loc[test_df["frame"] > 220]

fig = px.histogram(grouped_df,
                    x="distance_from_centroid", color="name", facet_col="name", facet_col_wrap=3)
fig.show()
# plot the mean and standard deviation of the distance from the centroid for each name
# Perform the groupby and aggregation
grouped_df = test_df.groupby(["name", "rep"]).agg({"distance_from_centroid": ["mean", "std"]}).reset_index()

# Flatten the multi-level columns
grouped_df.columns = ['_'.join(col).strip() if col[1] else col[0] for col in grouped_df.columns.values]

# Plot the mean and standard deviation of the distance from the centroid for each name
fig = px.bar(grouped_df,
             x="name", y="distance_from_centroid_mean", 
             error_y="distance_from_centroid_std")

fig.show()


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [160]:
#plot PCA colour by frame and name and rep

fig = px.scatter(test_df, x="tSNE1", y="tSNE2", color="frame", facet_col="name", facet_col_wrap=2)

fig.show()



In [161]:
md = xMD(settings, 'BPTI_genvel1', "5PTI", 1)


Replicate number:  1
Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel1
Trial directory logs:  logs/MD/5PTI/BPTI_genvel1
Trial directory data:  data/MD/5PTI/BPTI_genvel1
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel1
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel1
Environment variables set:  GMXLIB /home/alexi/Documents/xMD


In [162]:
md.generate_path_structure()

Trial directory temporary:  temporary/MD/5PTI/BPTI_genvel1
Trial directory logs:  logs/MD/5PTI/BPTI_genvel1
Trial directory data:  data/MD/5PTI/BPTI_genvel1
Trial directory visualisation:  visualisation/MD/5PTI/BPTI_genvel1
Trial directory analysis:  analysis/MD/5PTI/BPTI_genvel1


'data/MD/5PTI/BPTI_genvel1'