In [23]:
import os
import pandas as pd

# === Step 1: 분자명-kinetic diameter 딕셔너리 정의 (단위: Å) ===
kinetic_diameter_dict = {
    "H2": 2.89,
    "He": 2.6,
    "CH4": 3.8,
    "NH3": 2.6,
    "H2O": 2.65,
    "Ne": 2.75,
    "C2H2": 3.3,
    "N2": 3.64,
    "CO": 3.76,
    "C2H4": 3.9,
    "NO": 3.17,
    "O2": 3.46,
    "H2S": 3.6,
    "HCl": 3.2,
    "Ar": 3.4,
    "C3H6": 4.5,
    "CO2": 3.3,
    "N2O": 3.3,
    "C3H8": 4.3,
    "SO2": 3.6,
    "Cl2": 3.2,
    "C6H6": 5.85,
    "HBr": 3.5,
    "Kr": 3.6,
    "Xe": 3.96,
    "SF6": 5.5,
    "CCl4": 5.9,
    "Br2": 3.5
}

# === Step 2: 파일 로드 ===
META = pd.read_csv("../[99]Global_data/2019-04-11-ASR-internal_14845.csv")

with open("mofsimplify_list.txt", "r") as f:
    data = f.read()
mof_list = [x.replace(".cif", "") for x in data.strip().split("\n")]

# === Step 3: 사용자 입력으로 분자 선택 ===
print("사용 가능한 분자 목록:", ", ".join(kinetic_diameter_dict.keys()))
molecule = input("스크리닝할 분자명을 입력하세요 (예: CO2): ").strip()

if molecule not in kinetic_diameter_dict:
    raise ValueError(f"입력한 분자 {molecule}는 kinetic diameter 목록에 없습니다.")

cutoff_diameter = kinetic_diameter_dict[molecule]
print(f"{molecule}의 kinetic diameter: {cutoff_diameter} Å → 이보다 큰 PLD를 가진 MOF만 선택합니다.")

# === Step 4: MOF PLD 기반 필터링 ===
filtered_df = META[META["filename"].isin(mof_list)].reset_index(drop=True)
filtered_df = filtered_df[filtered_df["PLD"] > cutoff_diameter].reset_index(drop=True)

# === Step 5: .cif 이름 리스트로 만들고 파일로 저장 ===
filtered_filenames = filtered_df["filename"].tolist()
filtered_filenames = [f"{name}.cif" for name in filtered_filenames]

output_file = f"mofsimplify_list_{molecule}.txt"
with open(output_file, "w") as f:
    f.write("\n".join(filtered_filenames))

print(f"스크리닝 완료! {len(filtered_filenames)}개의 MOF가 선택되었으며, 결과는 '{output_file}'에 저장되었습니다.")


사용 가능한 분자 목록: H2, He, CH4, NH3, H2O, Ne, C2H2, N2, CO, C2H4, NO, O2, H2S, HCl, Ar, C3H6, CO2, N2O, C3H8, SO2, Cl2, C6H6, HBr, Kr, Xe, SF6, CCl4, Br2
Xe의 kinetic diameter: 3.96 Å → 이보다 큰 PLD를 가진 MOF만 선택합니다.
스크리닝 완료! 6417개의 MOF가 선택되었으며, 결과는 'mofsimplify_list_Xe.txt'에 저장되었습니다.


In [16]:
filtered_df

Unnamed: 0.1,Unnamed: 0,filename,LCD,PLD,LFPD,cm3_g,ASA_m2_cm3,ASA_m2_g,NASA_m2_cm3,NASA_m2_g,...,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48
0,0,00958972.2016.1250260_1436516_clean,4.71855,3.37000,4.71855,1.102640,1023.350,928.091,0.0000,0.0000,...,,,,,,,,,,
1,1,00958972.2016.1250260_1436519_clean,5.14924,3.51868,5.14924,1.254080,747.346,595.932,0.0000,0.0000,...,,,,,,,,,,
2,2,00958972.2016.1253069_1472494_clean,7.53123,5.27250,7.53123,3.484490,1077.330,309.178,93.8221,26.9256,...,,,,,,,,,,
3,15161,1499489-acs.cgd.6b01265_1499490_clean,10.85017,3.77263,10.26663,0.884188,1701.530,1924.400,0.0000,0.0000,...,,,,,,,,,,
4,3,ABAVIJ_clean,4.45543,2.49720,4.40652,1.524930,0.000,0.000,312.0520,204.6330,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12018,14347,ZUVTEP_clean,6.34335,3.26541,6.34335,1.639520,0.000,0.000,911.4640,555.9340,...,,,,,,,,,,
12019,14348,ZUWWAP_clean,5.07411,2.67704,5.07411,2.086200,0.000,0.000,528.5920,253.3760,...,,,,,,,,,,
12020,14350,ZUWXUM_clean,6.25511,5.54572,6.22585,0.917254,2526.110,2753.990,0.0000,0.0000,...,,,,,,,,,,
12021,14351,ZUXPOZ_clean,4.94280,4.31959,4.93372,2.001620,1314.330,656.635,0.0000,0.0000,...,,,,,,,,,,


Unnamed: 0,filename,LCD,PLD,LFPD,cm3_g,ASA_m2_cm3,ASA_m2_g,NASA_m2_cm3,NASA_m2_g,AV_VF,...,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48
0,00958972.2016.1250260_1436516_clean,4.71855,3.37000,4.71855,1.102640,1023.350,928.091,0.0000,0.0000,0.5514,...,,,,,,,,,,
1,00958972.2016.1250260_1436519_clean,5.14924,3.51868,5.14924,1.254080,747.346,595.932,0.0000,0.0000,0.4898,...,,,,,,,,,,
2,00958972.2016.1253069_1472494_clean,7.53123,5.27250,7.53123,3.484490,1077.330,309.178,93.8221,26.9256,0.4754,...,,,,,,,,,,
3,1499489-acs.cgd.6b01265_1499490_clean,10.85017,3.77263,10.26663,0.884188,1701.530,1924.400,0.0000,0.0000,0.6458,...,,,,,,,,,,
4,ABAVIJ_clean,4.45543,2.49720,4.40652,1.524930,0.000,0.000,312.0520,204.6330,0.3792,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12018,ZUVTEP_clean,6.34335,3.26541,6.34335,1.639520,0.000,0.000,911.4640,555.9340,0.5300,...,,,,,,,,,,
12019,ZUWWAP_clean,5.07411,2.67704,5.07411,2.086200,0.000,0.000,528.5920,253.3760,0.4640,...,,,,,,,,,,
12020,ZUWXUM_clean,6.25511,5.54572,6.22585,0.917254,2526.110,2753.990,0.0000,0.0000,0.7012,...,,,,,,,,,,
12021,ZUXPOZ_clean,4.94280,4.31959,4.93372,2.001620,1314.330,656.635,0.0000,0.0000,0.5790,...,,,,,,,,,,


In [None]:
MERGED = pd.merge(left = df_new, right = META, left_on = "name" , right_on="filename", how = "inner")
MERGED = MERGED[[ x for x in MERGED.columns if "Unnamed" not in x]]

mt_kd = len(MERGED["PLD"][~(MERGED["PLD"]  <= HE_KINETIC_DIAMETER)])
le_kd = len(MERGED["PLD"][(MERGED["PLD"]  <= HE_KINETIC_DIAMETER)])

MERGED[MERGED["PLD"]> HE_KINETIC_DIAMETER].reset_index(drop= True).to_csv("[DATA]He_298K_PLDscreened_20250407ver.csv")
# import matplotlib.pyplot as plt 
# plt.scatter(MERGED["PLD"], MERGED["15.0 bar"])
# plt.axvline(x = 2.6)
# plt.ylim(0, 1)
# plt.xlim(2.4, 2.9)

In [None]:
df_new

Unnamed: 0,name,0.01 bar,0.05 bar,0.1 bar,0.2 bar,0.35 bar,0.5 bar,15.0 bar,1.0 bar,5.0 bar
0,00958972.2016.1250260_1436516_clean,0.000114,0.000537,0.001203,0.002279,0.004000,0.005735,0.169909,0.011418,0.057601
1,00958972.2016.1250260_1436519_clean,0.000075,0.000342,0.000795,0.001492,0.002607,0.003775,0.109674,0.007674,0.036541
2,00958972.2016.1253069_1472494_clean,0.000046,0.000216,0.000453,0.000889,0.001612,0.002277,0.066353,0.004491,0.022329
3,ABAYOU_clean,0.000252,0.001303,0.002569,0.005182,0.009006,0.012909,0.366374,0.025487,0.128878
4,ABETAE_clean,0.000062,0.000333,0.000684,0.001263,0.002314,0.003332,0.096832,0.006518,0.032626
...,...,...,...,...,...,...,...,...,...,...
12017,ZURLAB_clean,0.000044,0.000217,0.000416,0.000808,0.001423,0.002064,0.059901,0.004043,0.020810
12018,ZURQOS_clean,0.000110,0.000599,0.001124,0.002302,0.004106,0.005692,0.172593,0.011555,0.058273
12019,ZUTBAR05_clean,0.000194,0.001011,0.002050,0.004081,0.007050,0.010082,0.283911,0.020190,0.099075
12020,ZUWXUM_clean,0.000353,0.001784,0.003593,0.007328,0.012697,0.017683,0.524139,0.036384,0.180695


In [None]:
META[~META["filename"].isin(df["name"])]

Unnamed: 0.1,Unnamed: 0,filename,LCD,PLD,LFPD,cm3_g,ASA_m2_cm3,ASA_m2_g,NASA_m2_cm3,NASA_m2_g,...,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48
5,4,ABAVOP_clean,3.53642,2.44162,3.53007,1.565940,0.000,0.000,122.782,78.4079,...,,,,,,,,,,
9,8,ABEMIF_clean,11.25251,6.80253,11.25251,1.151990,1273.120,1105.150,476.471,413.6070,...,,,,,,,,,,
25,24,ABUXUT_clean,3.18589,2.99837,3.18589,1.657380,0.000,0.000,0.000,0.0000,...,,,,,,,,,,
32,30,ACATAA_clean,7.11145,5.24842,7.11145,1.762610,1249.090,708.660,0.000,0.0000,...,,,,,,,,,,
34,32,ACENIF_clean,5.47565,4.20450,5.47328,1.313310,1262.560,961.352,0.000,0.0000,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14807,14313,ZUBKEO_clean,6.01999,5.27577,6.00041,1.384680,1271.510,918.270,0.000,0.0000,...,,,,,,,,,,
14817,14324,ZUJKAS_clean,6.99233,4.91611,6.97794,0.880805,1770.860,2010.510,0.000,0.0000,...,,,,,,,,,,
14821,14329,ZUMMIF_clean,5.73668,4.51756,5.73668,1.883100,722.622,383.741,0.000,0.0000,...,,,,,,,,,,
14842,14352,ZUYHIM_clean,8.11138,6.59647,8.11138,0.733886,2298.050,3131.340,0.000,0.0000,...,,,,,,,,,,
