In [2]:
import pandas as pd
import numpy as np
import os

# 분석 결과를 저장할 리스트 초기화
results = []

# 분석 함수 정의
def analyze_file(file_path):
    # 파일 읽기
    data = pd.read_csv(file_path)
    
    # E_pl^int 계산
    data['E_pl^int'] = (
        data['r_psp_MMGBSA_dG_Bind_Coulomb'] +
        data['r_psp_MMGBSA_dG_Bind_Covalent'] +
        data['r_psp_MMGBSA_dG_Bind_Hbond'] +
        data['r_psp_MMGBSA_dG_Bind_Lipo'] +
        data['r_psp_MMGBSA_dG_Bind_Packing']+
        data['r_psp_MMGBSA_dG_Bind_vdW']
    )
    
    # E_pl^int 평균값 계산
    E_pl_mean = data['E_pl^int'].mean()
    
    # ΔE_pl^int 계산
    data['ΔE_pl^int'] = data['E_pl^int'] - E_pl_mean
    
    # β 값 설정
    beta = 0.92
    
    # e^(βΔE_pl^int)의 평균값 계산
    avg_exp_term = np.mean(np.exp(beta * data['ΔE_pl^int']))
    
    # -TΔS 계산
    T = 300
    K = 0.0019872041  # Boltzmann constant in kcal/mol*K
    negative_T_delta_S = K * T * np.log(avg_exp_term)
    
    # Sol 에너지 평균 계산
    sol_avg = data['r_psp_MMGBSA_dG_Bind_Solv_GB'].mean()
    
    # r_psp_MMGBSA_dG_Bind 평균 계산
    r_psp_avg = data['r_psp_MMGBSA_dG_Bind'].mean()
    
    # 3개의 값 합산
    total = negative_T_delta_S + sol_avg + E_pl_mean
    
    # 결과 반환
    return {
        "file_name": os.path.basename(file_path),
        "-TΔS (kcal/mol)": negative_T_delta_S,
        "Sol avg (kcal/mol)": sol_avg,
        "E_pl^int avg (kcal/mol)": E_pl_mean,
        "Total (kcal/mol)": total,
        "r_psp_MMGBSA_dG_Bind avg (kcal/mol)": r_psp_avg
    }

# 특정 폴더의 모든 CSV 파일 분석
input_folder = "./"  # 분석할 CSV 파일이 저장된 폴더 경로
output_file = "analysis_results.csv"  # 결과를 저장할 파일 이름

# 폴더 내 모든 CSV 파일 처리
for file_name in os.listdir(input_folder):
    if file_name.endswith(".csv"):
        file_path = os.path.join(input_folder, file_name)
        try:
            result = analyze_file(file_path)
            results.append(result)
        except Exception as e:
            print(f"Error processing file {file_name}: {e}")

# 결과를 데이터프레임으로 정리
results_df = pd.DataFrame(results)

# 결과 저장
results_df.to_csv(output_file, index=False)

# 결과 출력
print("Analysis complete. Results:")
print(results_df)

Error processing file analysis_results.csv: 'r_psp_MMGBSA_dG_Bind_Coulomb'
Analysis complete. Results:
                        file_name  -TΔS (kcal/mol)  Sol avg (kcal/mol)  \
0    I-R21-C4_conf1_POC_group.csv        67.478668         -172.709970   
1    I-R21-C5_conf1_POC_group.csv        80.663682         -128.305574   
2    I-R23-C4_conf1_POC_group.csv       109.388041         -178.289846   
3    I-R23-C5_conf1_POC_group.csv        50.991404          -99.278758   
4     I-R4-C4_conf1_POC_group.csv        57.321987         -159.259858   
5    I-R4-CF5_conf1_POC_group.csv        38.192246         -168.390896   
6     I-R7-C4_conf1_POC_group.csv        50.303220         -198.223789   
7    I-R7-CF5_conf1_POC_group.csv        48.336523         -168.437321   
8    I-R9-CF5_conf1_POC_group.csv        51.821769         -118.110159   
9     I-T1-C4_conf1_POC_group.csv        45.348022         -156.118225   
10    I-T1-C5_conf1_POC_group.csv        79.494724         -127.908669   
11  IB3-T