This notebook runs Uni-MOF-Infer on downloaded MOFX .cif files stored under MOF_screening/MOFX_CIFs and outputs CO2 adsorption predictions at 60 °C and 1 atm. These are typical conditions for cooled flue gas exiting a cement factory (https://www.sciencedirect.com/science/article/pii/S2213343722018036#:~:text=Typically%2C%20depending%20on%20the%20source,24%5D%2C%20%5B25%5D.)

## Import packages

In [None]:
"""Import packages and set initial values"""
import os
import requests
import pandas as pd
import subprocess
from tqdm import tqdm

## Clone the GitHub repository locally (only do once at start of session)

In [None]:
token = "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"
username = "emd-aquila"
repo = "Xc51-MOFs"

if not os.path.exists(repo):
    !git clone https://{username}:{token}@github.com/{username}/{repo}.git
%cd {repo}

!git config --global user.name "emd-aquila"
!git config --global user.email "emduggan@mit.edu"
!git pull

## Configure Initial Values

In [None]:
MOF_DIR = "MOFX_CIFs"
OUTPUT_CSV = "MOF_screening/unimof_predictions.csv"
TOP5000_CSV = "MOF_screening/5000_top_co2_adsorbing_mofs.csv"
GAS = "CO2"
TEMP_K = 333.15        # 60 °C
PRESSURE_KPA = 101.325 # 1 atm
UNIMOF_INFER_CMD = "unimof-infer"  # Adjust if needed

## Gather .cif files

In [None]:
#to check that folder is there
os.makedirs(MOF_DIR, exist_ok=True)

# to gather and sort all .cif files
cif_files = sorted([f for f in os.listdir(MOF_DIR) if f.endswith(".cif")])
if not cif_files:
    raise RuntimeError(f"No .cif files found in {MOF_DIR}")

## Run Uni-MOF-infer, filter top 5000 CO2-adsorbing MOFs, and save CSVs to GitHub.

In [None]:
results = []
for cif_file in tqdm(cif_files, desc="Predicting CO2 adsorption"):
    mof_name = os.path.splitext(cif_file)[0]
    cif_path = os.path.join(MOF_DIR, cif_file)

    cmd = [
        UNIMOF_INFER_CMD,
        "--structure", cif_path,
        "--gas", GAS,
        "--temperature", str(TEMP_K),
        "--pressure", str(PRESSURE_KPA)
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        predicted_uptake = float(result.stdout.strip())
    except Exception as e:
        print(f"[ERROR] {mof_name}: {e}")
        predicted_uptake = None

    results.append({
        "MOF": mof_name,
        "CO2_Uptake_mmol/g": predicted_uptake
    })

# save inferred adsorptions values to CSV
df = pd.DataFrame(results)
df.to_csv(OUTPUT_CSV, index=False)
print(f"Predictions saved to {OUTPUT_CSV}")

# save top 5000 MOFs by adsorption value
filtered_df = df.dropna().sort_values("CO2_Uptake_mmol/g", ascending=False).head(5000)
filtered_df.to_csv(TOP5000_CSV, index=False)
print(f"Top 5000 CO2 adsorbing MOFs saved to {TOP5000_CSV}")

# add/commit/push to GitHub
try:
    subprocess.run(["git", "add", OUTPUT_CSV, TOP5000_CSV], check=True)
    subprocess.run(["git", "commit", "-m", "Add CO2 adsorption predictions and top 5000 MOFs"], check=True)
    subprocess.run(["git", "push"], check=True)
    print("CSVs pushed to GitHub under MOF_screening/")
except subprocess.CalledProcessError as e:
    print(f"[GIT ERROR] Could not commit/push results: {e}")