In [5]:
import os
import pandas as pd
import re

def parse_ddg_file(file_path):
    """
    Parses a .ddg file and returns a dataframe.
    """
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            # Use regex to find WT or MUT values in each line
            match = re.search(r'WT_: (-?\d+\.\d+)', line)
            if match:
                data.append(('WT', float(match.group(1))))
            match = re.search(r'MUT_[\w_]+: (-?\d+\.\d+)', line)
            if match:
                data.append(('MUT', float(match.group(1))))
    return pd.DataFrame(data, columns=['Type', 'Value'])

def process_ddg_files(directory):
    """
    Processes all .ddg files in a given directory and prints the mean values for each file.
    Also saves the results into a .csv file.
    """
    results = []

    for filename in os.listdir(directory):
        if filename.endswith(".ddg"):
            file_path = os.path.join(directory, filename)
            df = parse_ddg_file(file_path)

            wt_values = df[df['Type'] == 'WT']['Value'].tolist()
            mut_values = df[df['Type'] == 'MUT']['Value'].tolist()

            wt_mean = sum(wt_values) / len(wt_values) if wt_values else 0
            mut_mean = sum(mut_values) / len(mut_values) if mut_values else 0
            diff_mean = mut_mean - wt_mean

            results.append({
                "Filename": filename,
                "Mean_WT": wt_mean,
                "Mean_MUT": mut_mean,
                "ddG": diff_mean
            })

            print(f"File: {filename}")
            print(f"  Mean_WT: {wt_mean}")
            print(f"  Mean_MUT: {mut_mean}")
            print(f"  ddG: {diff_mean}\n")

    # Save results to a CSV file
    results_df = pd.DataFrame(results)
    results_df.to_csv("/home/iwe25/Franz/CEPI/H5N1/DryLab/Vivi/results/ddg_results.csv", index=False)

def main():
    directory = '/home/iwe25/Franz/CEPI/H5N1/DryLab/Vivi/results'  # Replace with the directory containing .ddg files
    process_ddg_files(directory)

if __name__ == "__main__":
    main()


File: M74I.ddg
  Mean_WT: -1124.5436666666667
  Mean_MUT: -1127.894
  ddG: -3.3503333333333103

File: F211L.ddg
  Mean_WT: -1129.0873333333334
  Mean_MUT: -1126.962
  ddG: 2.1253333333334012

File: V168I.ddg
  Mean_WT: -1125.805
  Mean_MUT: -1119.819
  ddG: 5.986000000000104

File: R48N.ddg
  Mean_WT: -1124.9983333333332
  Mean_MUT: -1126.3500000000001
  ddG: -1.3516666666669153

File: L75T.ddg
  Mean_WT: -1123.1156666666668
  Mean_MUT: -1122.233
  ddG: 0.8826666666668643

File: S29T.ddg
  Mean_WT: -1120.388
  Mean_MUT: -1121.3256666666666
  ddG: -0.9376666666667006

File: S34T.ddg
  Mean_WT: -1121.708
  Mean_MUT: -1122.656
  ddG: -0.9479999999998654

File: F31Y.ddg
  Mean_WT: -1123.508
  Mean_MUT: -1122.464
  ddG: 1.0440000000000964

File: P147T.ddg
  Mean_WT: -1122.347
  Mean_MUT: -1117.378
  ddG: 4.969000000000051

File: T21S.ddg
  Mean_WT: -1118.8180000000002
  Mean_MUT: -1115.6826666666666
  ddG: 3.1353333333336195

File: V72I.ddg
  Mean_WT: -1121.119
  Mean_MUT: -1122.705
  ddG: 

In [6]:
results_df


NameError: name 'results_df' is not defined