In [1]:
import numpy as np
import pandas as pd
import json
import os

In [2]:
# Specify the subdirectories you want to loop through
subdirs = ['500_r1', '500_r2', '500_r3', '500_r4', '500_r5', '500_r6']  # List of subdirectories you want to loop through
dataframes = {}  # Dictionary to store dataframes

# Loop through each subdirectory
for subdir in subdirs:
    # Construct the path to the CSV file (assuming the CSV has the same name as the subdir)
    csv_file_path = os.path.join(os.getcwd(), subdir, f'openmm_statistics.csv')

    # Read the CSV into a pandas dataframe
    if os.path.exists(csv_file_path):
        df = pd.read_csv(csv_file_path)
        # Store the dataframe with a unique name (using the subdirectory name)
        dataframes[f'df_{subdir}'] = df
        print(f"Data from {subdir} loaded successfully into dataframe: {subdir}")
    else:
        print(f"CSV file not found in {subdir}")

# Access the dataframes
for name, df in dataframes.items():
    print(f"Dataframe '{name}':")
    print(df.head())

Data from 500_r1 loaded successfully into dataframe: 500_r1
Data from 500_r2 loaded successfully into dataframe: 500_r2
Data from 500_r3 loaded successfully into dataframe: 500_r3
Data from 500_r4 loaded successfully into dataframe: 500_r4
Data from 500_r5 loaded successfully into dataframe: 500_r5
Data from 500_r6 loaded successfully into dataframe: 500_r6
Dataframe 'df_500_r1':
   #"Step"  Potential Energy (kJ/mole)  Kinetic Energy (kJ/mole)  \
0     2000                33560.084365              19377.997873   
1     4000                33315.971654              19745.349717   
2     6000                33494.813067              20222.252227   
3     8000                33495.881493              19990.496594   
4    10000                33222.176093              19843.434133   

   Total Energy (kJ/mole)  Temperature (K)  Box Volume (nm^3)  Density (g/mL)  \
0            52938.082238       287.201163          63.771038        1.071383   
1            53061.321371       292.645682    

In [3]:
means_500_r1 = dataframes['df_500_r1'].mean()
means_500_r2 = dataframes['df_500_r2'].mean()
means_500_r3 = dataframes['df_500_r3'].mean()
means_500_r4 = dataframes['df_500_r4'].mean()
means_500_r5 = dataframes['df_500_r5'].mean()
means_500_r6 = dataframes['df_500_r6'].mean()

In [4]:
means_500_dict={}
means_500_dict['rep1']=means_500_r1
means_500_dict['rep2']=means_500_r2
means_500_dict['rep3']=means_500_r3
means_500_dict['rep4']=means_500_r4
means_500_dict['rep5']=means_500_r5
means_500_dict['rep6']=means_500_r6

In [5]:
means_500_dict

{'rep1': #"Step"                       501000.000000
 Potential Energy (kJ/mole)     31504.409855
 Kinetic Energy (kJ/mole)       19665.431810
 Total Energy (kJ/mole)         51169.841666
 Temperature (K)                  291.461220
 Box Volume (nm^3)                 60.902604
 Density (g/mL)                     1.121968
 Speed (ns/day)                   444.548000
 dtype: float64,
 'rep2': #"Step"                       501000.000000
 Potential Energy (kJ/mole)     31514.147336
 Kinetic Energy (kJ/mole)       19658.256034
 Total Energy (kJ/mole)         51172.403370
 Temperature (K)                  291.354867
 Box Volume (nm^3)                 60.865638
 Density (g/mL)                     1.122618
 Speed (ns/day)                   446.578000
 dtype: float64,
 'rep3': #"Step"                       501000.000000
 Potential Energy (kJ/mole)     31499.593027
 Kinetic Energy (kJ/mole)       19640.740966
 Total Energy (kJ/mole)         51140.333993
 Temperature (K)                  291.0952

In [6]:
PE_500=[]
KE_500=[]
TE_500=[]
bVol_500=[]
dens_500=[]

for key, value_list in means_500_dict.items():
    print(f'Key: {key}')
    # print(f'Values: {value_list}')
    PE_500.append(value_list.iloc[1])
    KE_500.append(value_list.iloc[2])
    TE_500.append(value_list.iloc[3])
    bVol_500.append(value_list.iloc[5])
    dens_500.append(value_list.iloc[6])


Key: rep1
Key: rep2
Key: rep3
Key: rep4
Key: rep5
Key: rep6


In [7]:
PE_500_mean=np.array(PE_500).mean()
PE_500_std=np.array(PE_500).std(ddof=1)
PE_500_err=PE_500_std/np.sqrt(len(PE_500))
print(f'Hmix 500 replicates, Potential Energy (kJ/mol): {PE_500_mean:.3f} +/- {PE_500_err:.3f}')

Hmix 500 replicates, Potential Energy (kJ/mol): 31512.856 +/- 16.167


In [8]:
KE_500_mean=np.array(KE_500).mean()
KE_500_std=np.array(KE_500).std(ddof=1)
KE_500_err=KE_500_std/np.sqrt(len(KE_500))
print(f'Hmix 500 replicates, Kinetic Energy (kJ/mol): {KE_500_mean:.3f} +/- {KE_500_err:.3f}')

Hmix 500 replicates, Kinetic Energy (kJ/mol): 19654.916 +/- 4.394


In [9]:
TE_500_mean=np.array(TE_500).mean()
TE_500_std=np.array(TE_500).std(ddof=1)
TE_500_err=TE_500_std/np.sqrt(len(TE_500))
print(f'Hmix 500 replicates, Total Energy (kJ/mol): {TE_500_mean:.3f} +/- {TE_500_err:.3f}')

Hmix 500 replicates, Total Energy (kJ/mol): 51167.772 +/- 18.114


In [10]:
bVol_500_mean=np.array(bVol_500).mean()
bVol_500_std=np.array(bVol_500).std(ddof=1)
bVol_500_err=bVol_500_std/np.sqrt(len(bVol_500))
print(f'Hmix 500 replicates, Box Volume (nm^3): {bVol_500_mean:.3f} +/- {bVol_500_err:.3f}')

Hmix 500 replicates, Box Volume (nm^3): 60.861 +/- 0.019


In [11]:
dens_500_mean=np.array(dens_500).mean()
dens_500_std=np.array(dens_500).std(ddof=1)
dens_500_err=dens_500_std/np.sqrt(len(dens_500))
print(f'Hmix 500 replicates, Density (g/mL): {dens_500_mean:.3f} +/- {dens_500_err:.3f}')

Hmix 500 replicates, Density (g/mL): 1.123 +/- 0.000


In [12]:
with open('output_stats_500_2ns.txt', 'w') as file:
    file.write(str(means_500_dict))
    file.write(f'Hmix 500 replicates, Potential Energy (kJ/mol): {PE_500_mean:.3f} +/- {PE_500_err:.3f}')
    file.write(f'Hmix 500 replicates, Kinetic Energy (kJ/mol): {KE_500_mean:.3f} +/- {KE_500_err:.3f}')
    file.write(f'Hmix 500 replicates, Total Energy (kJ/mol): {TE_500_mean:.3f} +/- {TE_500_err:.3f}')
    file.write(f'Hmix 500 replicates, Box Volume (nm^3): {bVol_500_mean:.3f} +/- {bVol_500_err:.3f}')
    file.write(f'Hmix 500 replicates, Density (g/mL): {dens_500_mean:.3f} +/- {dens_500_err:.3f}')
    

In [13]:
# Specify the subdirectories you want to loop through
obs_df = {}  # Dictionary to store obs_df

# Loop through each xdirectory
for xdir in subdirs:
    # Construct the path to the CSV file (assuming the CSV has the same name as the xdir)
    csv_file_path = os.path.join(os.getcwd(), xdir, f'6421_calculate_excess_observable_output.json')

    # Read the CSV into a pandas dataframe
    if os.path.exists(csv_file_path):
        jsdf = pd.read_json(csv_file_path)
        # Store the dataframe with a unique name (using the xdirectory name)
        obs_df[f'value_{xdir}'] = jsdf['.result']['value']['value']
        obs_df[f'error_{xdir}'] = jsdf['.result']['value']['error']
        print(f"Data from {xdir} loaded successfully into dataframe: {xdir}")
    else:
        print(f"CSV file not found in {xdir}")

# Access the obs_df
for name, jsdf in obs_df.items():
    print(f"Dataframe '{name}':")
    # print(jsdf.head())

Data from 500_r1 loaded successfully into dataframe: 500_r1
Data from 500_r2 loaded successfully into dataframe: 500_r2
Data from 500_r3 loaded successfully into dataframe: 500_r3
Data from 500_r4 loaded successfully into dataframe: 500_r4
Data from 500_r5 loaded successfully into dataframe: 500_r5
Data from 500_r6 loaded successfully into dataframe: 500_r6
Dataframe 'value_500_r1':
Dataframe 'error_500_r1':
Dataframe 'value_500_r2':
Dataframe 'error_500_r2':
Dataframe 'value_500_r3':
Dataframe 'error_500_r3':
Dataframe 'value_500_r4':
Dataframe 'error_500_r4':
Dataframe 'value_500_r5':
Dataframe 'error_500_r5':
Dataframe 'value_500_r6':
Dataframe 'error_500_r6':


In [14]:
observables={}
observables['value_r1']=obs_df['value_500_r1']
observables['error_r1']=obs_df['error_500_r1']
observables['value_r2']=obs_df['value_500_r2']
observables['error_r2']=obs_df['error_500_r2']
observables['value_r3']=obs_df['value_500_r3']
observables['error_r3']=obs_df['error_500_r3']
observables['value_r4']=obs_df['value_500_r4']
observables['error_r4']=obs_df['error_500_r4']
observables['value_r5']=obs_df['value_500_r5']
observables['error_r5']=obs_df['error_500_r5']
observables['value_r6']=obs_df['value_500_r6']
observables['error_r6']=obs_df['error_500_r6']

In [15]:
observables

{'value_r1': {'value': -1.5354113035687642,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'error_r1': {'value': 0.09132507014914701,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'value_r2': {'value': -1.4651932172143631,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'error_r2': {'value': 0.101108802339387,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'value_r3': {'value': -1.500722359357212,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'error_r3': {'value': 0.110143628359312,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'value_r4': {'value': -1.102634648787642,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'error_r4': {'value': 0.116825975105314,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'value_r5': {'value': -1.503496497456069,
  'un

In [16]:
obs_values=[]
obs_errors=[]
for key, mr in observables.items():
    if 'value' in key:
        print(f"{key}:{mr['value']}")
        obs_values.append(mr['value'])
    elif 'error' in key:
        print(f"{key}:{mr['value']}")
        obs_errors.append(mr['value'])

value_r1:-1.5354113035687642
error_r1:0.09132507014914701
value_r2:-1.4651932172143631
error_r2:0.101108802339387
value_r3:-1.500722359357212
error_r3:0.110143628359312
value_r4:-1.102634648787642
error_r4:0.116825975105314
value_r5:-1.503496497456069
error_r5:0.07744090362391601
value_r6:-1.7146722272061221
error_r6:0.09215569297355901


In [17]:
print(obs_values)

[-1.5354113035687642, -1.4651932172143631, -1.500722359357212, -1.102634648787642, -1.503496497456069, -1.7146722272061221]


In [18]:
with open('output_observables_500_2ns.txt', 'w') as file:
    file.write(str(observables))
    file.write('\n')
    file.write(f'Values list: {obs_values} \n')
    file.write(f'Errors list: {obs_errors} \n')

In [19]:
# Specify the subdirectories you want to loop through
exobs_df = {}  # Dictionary to store exobs_df

# Loop through each ydirectory
for ydir in subdirs:
    # Construct the path to the CSV file (assuming the CSV has the same name as the ydir)
    csv_file_path = os.path.join(os.getcwd(), ydir, f'6421_extract_observable_mixture_output.json')

    # Read the CSV into a pandas dataframe
    if os.path.exists(csv_file_path):
        jsdf = pd.read_json(csv_file_path)
        # Store the dataframe with a unique name (using the ydirectory name)
        exobs_df[f'value_{ydir}'] = jsdf['.value']['value']['value']
        exobs_df[f'error_{ydir}'] = jsdf['.value']['value']['error']
        exobs_df[f'n_total_points_{ydir}'] = jsdf['.time_series_statistics']["n_total_points"]
        exobs_df[f'n_uncorrelated_points_{ydir}'] = jsdf['.time_series_statistics']["n_uncorrelated_points"]
        exobs_df[f'statistical_inefficiency_{ydir}'] = jsdf['.time_series_statistics']["statistical_inefficiency"]
        exobs_df[f'equilibration_index_{ydir}'] = jsdf['.time_series_statistics']["equilibration_index"]

        print(f"Data from {ydir} loaded successfully into dataframe: {ydir}")
    else:
        print(f"CSV file not found in {ydir}")

# Access the exobs_df
for name, jsdf in exobs_df.items():
    print(f"Dataframe '{name}':")
    # print(jsdf.head())

Data from 500_r1 loaded successfully into dataframe: 500_r1
Data from 500_r2 loaded successfully into dataframe: 500_r2
Data from 500_r3 loaded successfully into dataframe: 500_r3
Data from 500_r4 loaded successfully into dataframe: 500_r4
Data from 500_r5 loaded successfully into dataframe: 500_r5
Data from 500_r6 loaded successfully into dataframe: 500_r6
Dataframe 'value_500_r1':
Dataframe 'error_500_r1':
Dataframe 'n_total_points_500_r1':
Dataframe 'n_uncorrelated_points_500_r1':
Dataframe 'statistical_inefficiency_500_r1':
Dataframe 'equilibration_index_500_r1':
Dataframe 'value_500_r2':
Dataframe 'error_500_r2':
Dataframe 'n_total_points_500_r2':
Dataframe 'n_uncorrelated_points_500_r2':
Dataframe 'statistical_inefficiency_500_r2':
Dataframe 'equilibration_index_500_r2':
Dataframe 'value_500_r3':
Dataframe 'error_500_r3':
Dataframe 'n_total_points_500_r3':
Dataframe 'n_uncorrelated_points_500_r3':
Dataframe 'statistical_inefficiency_500_r3':
Dataframe 'equilibration_index_500_r3'

In [20]:
mixout={}

mixout['value_r1']=exobs_df['value_500_r1']
mixout['error_r1']=exobs_df['error_500_r1']
mixout['total_points_r1']=exobs_df['n_total_points_500_r1']
mixout['uncorr_points_r1']=exobs_df['n_uncorrelated_points_500_r1']
mixout['stat_ineff_r1']=exobs_df['statistical_inefficiency_500_r1']
mixout['equil_index_r1']=exobs_df['equilibration_index_500_r1']

mixout['value_r2']=exobs_df['value_500_r2']
mixout['error_r2']=exobs_df['error_500_r2']
mixout['total_points_r2']=exobs_df['n_total_points_500_r2']
mixout['uncorr_points_r2']=exobs_df['n_uncorrelated_points_500_r2']
mixout['stat_ineff_r2']=exobs_df['statistical_inefficiency_500_r2']
mixout['equil_index_r2']=exobs_df['equilibration_index_500_r2']

mixout['value_r3']=exobs_df['value_500_r3']
mixout['error_r3']=exobs_df['error_500_r3']
mixout['total_points_r3']=exobs_df['n_total_points_500_r3']
mixout['uncorr_points_r3']=exobs_df['n_uncorrelated_points_500_r3']
mixout['stat_ineff_r3']=exobs_df['statistical_inefficiency_500_r3']
mixout['equil_index_r3']=exobs_df['equilibration_index_500_r3']

mixout['value_r4']=exobs_df['value_500_r4']
mixout['error_r4']=exobs_df['error_500_r4']
mixout['total_points_r4']=exobs_df['n_total_points_500_r4']
mixout['uncorr_points_r4']=exobs_df['n_uncorrelated_points_500_r4']
mixout['stat_ineff_r4']=exobs_df['statistical_inefficiency_500_r4']
mixout['equil_index_r4']=exobs_df['equilibration_index_500_r4']

mixout['value_r5']=exobs_df['value_500_r5']
mixout['error_r5']=exobs_df['error_500_r5']
mixout['total_points_r5']=exobs_df['n_total_points_500_r5']
mixout['uncorr_points_r5']=exobs_df['n_uncorrelated_points_500_r5']
mixout['stat_ineff_r5']=exobs_df['statistical_inefficiency_500_r5']
mixout['equil_index_r5']=exobs_df['equilibration_index_500_r5']

mixout['value_r6']=exobs_df['value_500_r6']
mixout['error_r6']=exobs_df['error_500_r6']
mixout['total_points_r6']=exobs_df['n_total_points_500_r6']
mixout['uncorr_points_r6']=exobs_df['n_uncorrelated_points_500_r6']
mixout['stat_ineff_r6']=exobs_df['statistical_inefficiency_500_r6']
mixout['equil_index_r6']=exobs_df['equilibration_index_500_r6']

In [21]:
mixout

{'value_r1': {'value': 101.40731510718878,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'error_r1': {'value': 0.07420582843498301,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'total_points_r1': 500,
 'uncorr_points_r1': 152,
 'stat_ineff_r1': 1.089122639390278,
 'equil_index_r1': 196,
 'value_r2': {'value': 101.23559543102985,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'error_r2': {'value': 0.078661533116317,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'total_points_r2': 500,
 'uncorr_points_r2': 93,
 'stat_ineff_r2': 1.7868991321666212,
 'equil_index_r2': 315,
 'value_r3': {'value': 101.081156385655,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'error_r3': {'value': 0.09768388895117901,
  'unit': 'kilojoule / mole',
  '@type': 'openff.evaluator.unit.Quantity'},
 'total_points_r3': 500,
 'uncorr_points_r3': 50,
 'stat_ineff_

In [22]:
mixout_values=[]
mixout_errors=[]
mixout_totpts=[]
mixout_uncorrpts=[]
mixout_statineff=[]
mixout_equilindex=[]

for key, mr in mixout.items():
    if 'value' in key:
        print(f"{key}:{mr['value']}")
        mixout_values.append(mr['value'])
    elif 'error' in key:
        print(f"{key}:{mr['value']}")
        mixout_errors.append(mr['value'])
    elif 'total_points' in key:
        print(f"{key}:{mr}")
        mixout_totpts.append(mr)
    elif 'uncorr_points' in key:
        print(f"{key}:{mr}")
        mixout_uncorrpts.append(mr)
    elif 'stat_ineff' in key:
        print(f"{key}:{mr}")
        mixout_statineff.append(mr)
    elif 'equil_index' in key:
        print(f"{key}:{mr}")
        mixout_equilindex.append(mr)

value_r1:101.40731510718878
error_r1:0.07420582843498301
total_points_r1:500
uncorr_points_r1:152
stat_ineff_r1:1.089122639390278
equil_index_r1:196
value_r2:101.23559543102985
error_r2:0.078661533116317
total_points_r2:500
uncorr_points_r2:93
stat_ineff_r2:1.7868991321666212
equil_index_r2:315
value_r3:101.081156385655
error_r3:0.09768388895117901
total_points_r3:500
uncorr_points_r3:50
stat_ineff_r3:2.308010359643837
equil_index_r3:351
value_r4:101.44905423613649
error_r4:0.07781132294665101
total_points_r4:500
uncorr_points_r4:83
stat_ineff_r4:2.1426619155211712
equil_index_r4:253
value_r5:101.17491941642545
error_r5:0.046293786127654
total_points_r5:500
uncorr_points_r5:248
stat_ineff_r5:1.0
equil_index_r5:252
value_r6:101.30540860170333
error_r6:0.06480285886127901
total_points_r6:500
uncorr_points_r6:96
stat_ineff_r6:1.9913116269182631
equil_index_r6:309


In [23]:
with open('mixture_output_500_2ns.txt', 'w') as file:
    file.write(str(mixout))
    file.write('\n')
    file.write(f'Values list: {mixout_values} \n')
    file.write(f'Errors list: {mixout_errors} \n')
    file.write(f'Total points list: {mixout_totpts} \n')
    file.write(f'Uncorrelated points list: {mixout_uncorrpts} \n')
    file.write(f'Statistical inefficiency list: {mixout_statineff} \n')
    file.write(f'Equilibration index list: {mixout_equilindex} \n')