<a href="https://colab.research.google.com/github/eoinleen/Protein-design-random/blob/main/AF3_extraction_convert_to_pdb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# ==============================================================================
# CIF to PDB Converter for AF3 Structure Files
# ==============================================================================
#
# DESCRIPTION:
#     This script converts AlphaFold3 structure files from CIF format to PDB format.
#     It processes files from the previously created directories:
#       - all_cif_files: Converts all CIF files to PDB format
#       - model0_cif_files: Converts model_0 CIF files to PDB format
#
# USAGE:
#     1. Mount your Google Drive
#     2. Update the base_dir path to your AF3 directory
#     3. Run the script
#
# INPUTS:
#     - base_dir: Path to directory containing organized CIF files
#
# OUTPUTS:
#     - PDB files created in:
#       * all_pdb_files - All structures in PDB format
#       * model0_pdb_files - Only model_0 structures in PDB format
#
# REQUIREMENTS:
#     - Biopython
#
# CREATED: March 10, 2025
# ==============================================================================

# Install Biopython if not already installed
!pip install -q biopython

import os
from Bio.PDB import MMCIFParser, PDBIO
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Set your base directory path - update this with your actual path
base_dir = '/content/drive/MyDrive/PDB-files/AF3-recalc_folds_2025_03_08_19_33'  # Update this if needed

# Define directories
all_cif_dir = os.path.join(base_dir, 'all_cif_files')
model0_cif_dir = os.path.join(base_dir, 'model0_cif_files')
all_pdb_dir = os.path.join(base_dir, 'all_pdb_files')
model0_pdb_dir = os.path.join(base_dir, 'model0_pdb_files')

# Create output directories if they don't exist
os.makedirs(all_pdb_dir, exist_ok=True)
os.makedirs(model0_pdb_dir, exist_ok=True)

# Initialize parser and writer
parser = MMCIFParser()
io = PDBIO()

# Function to convert CIF to PDB
def convert_cif_to_pdb(cif_path, pdb_path):
    try:
        # Parse the CIF file
        structure = parser.get_structure('structure', cif_path)

        # Write as PDB
        io.set_structure(structure)
        io.save(pdb_path)
        return True
    except Exception as e:
        print(f"Error converting {cif_path}: {e}")
        return False

# Process all CIF files
print("Converting all CIF files to PDB format...")
successful_all = 0
failed_all = 0

for filename in os.listdir(all_cif_dir):
    if filename.endswith('.cif'):
        cif_path = os.path.join(all_cif_dir, filename)
        pdb_path = os.path.join(all_pdb_dir, filename.replace('.cif', '.pdb'))

        if convert_cif_to_pdb(cif_path, pdb_path):
            successful_all += 1
        else:
            failed_all += 1

        if (successful_all + failed_all) % 10 == 0:
            print(f"Processed {successful_all + failed_all} files...")

# Process model0 CIF files
print("\nConverting model_0 CIF files to PDB format...")
successful_model0 = 0
failed_model0 = 0

for filename in os.listdir(model0_cif_dir):
    if filename.endswith('.cif'):
        cif_path = os.path.join(model0_cif_dir, filename)
        pdb_path = os.path.join(model0_pdb_dir, filename.replace('.cif', '.pdb'))

        if convert_cif_to_pdb(cif_path, pdb_path):
            successful_model0 += 1
        else:
            failed_model0 += 1

        if (successful_model0 + failed_model0) % 5 == 0:
            print(f"Processed {successful_model0 + failed_model0} model_0 files...")

# Print summary
print("\nConversion Summary:")
print(f"All CIF files: {successful_all} converted successfully, {failed_all} failed")
print(f"Model_0 CIF files: {successful_model0} converted successfully, {failed_model0} failed")
print(f"\nOutput directories:")
print(f"- All PDB files saved to: {all_pdb_dir}")
print(f"- Model 0 PDB files saved to: {model0_pdb_dir}")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/3.3 MB[0m [31m5.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/3.3 MB[0m [31m20.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m3.3/3.3 MB[0m [31m37.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
[?25hDrive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Converting all CIF files to PDB format...
Processed 10 files...
Processed 20 files...
Processed 30 files...
Processed 40 files...
Processed 50 files...
Processed 60 files...
Processed 70 files...
Processed 80 files...
Processed 90 files...
Pr