## Clean the directory

In [None]:
# import shutil
# import os

# # Define the working directory
# working_dir = '/kaggle/working'

# # Check if the directory exists
# if os.path.exists(working_dir):
#     # Iterate through all files and subdirectories
#     for item in os.listdir(working_dir):
#         item_path = os.path.join(working_dir, item)
#         # Remove files and directories
#         if os.path.isfile(item_path) or os.path.islink(item_path):
#             os.unlink(item_path)
#         elif os.path.isdir(item_path):
#             shutil.rmtree(item_path)  

In [None]:
# import os

# # Check if the directory exists
# if os.path.exists(working_dir):
#     # List the contents of the directory
#     remaining_items = os.listdir(working_dir)
#     if not remaining_items:
#         print("All contents have been removed. The directory is empty.")
#     else:
#         print("The directory is not empty. Remaining items:")
#         for item in remaining_items:
#             print(f"- {item}")
# else:
#     print("The directory does not exist.")

# # Clear existing handlers
# #for handler in logging.root.handlers[:]:
# #    logging.root.removeHandler(handler)


## Clone repo

In [None]:
import os
os.chdir('/kaggle/working') 
print(f"New working directory: {os.getcwd()}")  

In [None]:
!git clone https://github.com/Vivi-tran/ColabFold_data.git /kaggle/working/ColabFold_data

In [None]:
import os
os.chdir('/kaggle/working') 
print(f"New working directory: {os.getcwd()}")  

## Install ColabFold

In [None]:
!wget https://raw.githubusercontent.com/YoshitakaMo/localcolabfold/main/install_colabbatch_linux.sh

In [None]:
!bash install_colabbatch_linux.sh

In [None]:
# !pip install --upgrade matplotlib-inline ipython

## Run AF2

In [None]:
import os

# Directory where 'colabfold_batch' is located
colabfold_bin_dir = '/kaggle/working/localcolabfold/colabfold-conda/bin'

# Prepend to PATH
os.environ['PATH'] = colabfold_bin_dir + ':' + os.environ.get('PATH', '')

# Verify the updated PATH
print("Updated PATH:")
print(os.environ['PATH'])


In [None]:
!pip install -q matplotlib-inline ipython && MPLBACKEND=Agg
!colabfold_batch --help
# !colabfold_batch ./ColabFold_data/tests/32_PPepDB_2773.fa outputdir/ --num-recycle 20 --recycle-early-stop-tolerance 0.5 --num-relax 1 --calc-extra-ptm

In [None]:
# !colabfold_batch --help

In [None]:
import os
import shutil
import subprocess
import glob
import logging

# ---------------------------
# 1. Reset and Configure Logging
# ---------------------------

# Clear existing handlers to prevent duplicate logs
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

# Configure logging
logging.basicConfig(
    filename='log.txt',
    filemode='w',  
    level=logging.DEBUG,  
    format='%(asctime)s - %(levelname)s - %(message)s',
)

# Create a custom logger
logger = logging.getLogger('BatchProcessingLogger')

# Also log to the console by adding a StreamHandler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)  # Set console log level to INFO
console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)

# ---------------------------
# 2. Define Paths
# ---------------------------

input_folder = 'ColabFold_data/insectipep/split/S_2'
temp_output_folder = 'outputdir/'
final_output_folder = 'S_2_out/'

# Create the final output folder if it doesn't exist
os.makedirs(final_output_folder, exist_ok=True)

# ---------------------------
# 3. Get List of `.fa` Files
# ---------------------------

fa_files = glob.glob(os.path.join(input_folder, '*.fa'))

if not fa_files:
    logger.warning(f"No .fa files found in {input_folder}. Exiting the script.")
else:
    logger.info(f"Found {len(fa_files)} .fa files in {input_folder}.")

# ---------------------------
# 4. Process Each `.fa` File
# ---------------------------

for fa_path in fa_files:
    # Extract the base filename without extension
    base_name = os.path.splitext(os.path.basename(fa_path))[0]
    
    logger.info(f"Processing {base_name}.fa...")
    
    # Run the colabfold_batch command
    cmd = [
        'colabfold_batch',
        fa_path,
        temp_output_folder,
        '--num-recycle', '20',
        '--num-relax', '1',
        '--recycle-early-stop-tolerance', '0.5',
        '--calc-extra-ptm',
    ]
    
    try:
        # Execute the command
        subprocess.run(cmd, check=True)
        logger.info(f"ColabFold finished for {base_name}.fa")
    except subprocess.CalledProcessError as e:
        logger.error(f"Error processing {base_name}.fa: {e}")
        continue  
    
    # ---------------------------
    # 5. Prepare output subfolder
    # ---------------------------
    output_subfolder01 = os.path.join(temp_output_folder, base_name)
    # output_subfolder02 = os.path.join(final_output_folder, f"{base_name}_zip")
    os.makedirs(output_subfolder01, exist_ok=True)
    # os.makedirs(output_subfolder02, exist_ok=True)
    # ---------------------------
    # 6. Copy all to output subfolder and create zip
    # ---------------------------
    
    for file in os.listdir(temp_output_folder):
        file_path = os.path.join(temp_output_folder, file)

        if os.path.isfile(file_path):

            if file.startswith(base_name):
                dest_file = os.path.join(output_subfolder01, file)
            else:
                dest_file = os.path.join(output_subfolder01, f"{base_name}_{file}")
            try:
                shutil.copyfile(file_path, dest_file)
            except Exception:
                pass

    shutil.make_archive(base_dir=str(output_subfolder01), format='zip', base_name=str(output_subfolder01))
    try:
        shutil.move(f"{output_subfolder01}.zip", final_output_folder)
        logger.info(f"Saved {base_name}.zip to {final_output_folder}")
    except Exception:
        pass
    # ---------------------------
    # 8. Clean Up Temporary Output
    # ---------------------------
    try:
        shutil.rmtree(temp_output_folder)
        logger.info(f"Deleted temporary folder {temp_output_folder}")
    except Exception as e:
        logger.error(f"Failed to delete temporary folder {temp_output_folder}: {e}")

    # Recreate the temporary output folder for the next iteration
    os.makedirs(temp_output_folder, exist_ok=True)
    logger.info(f"Recreated temporary folder {temp_output_folder} for the next run.\n")

logger.info("Batch processing completed for all files.")


In [None]:
import shutil
shutil.make_archive(str(final_output_folder), 'zip', str(final_output_folder))

In [None]:
import IPython
# Final cell code
IPython.Application.instance().kernel.do_shutdown(True)