## **Note:** 
### For this tool, we are using the processing in batch, which means that the tool will process a batch of sequences on each run. The batch size is set to 100 sequences. We are submitting the batch to the site of the tool. The answer will be sent to the email address. 

This is basically the same code from Colab Alphafold2, just adding a function to update the csv before process, to avoid re-process the same sequence, a add a loop to iterate over the rows of the dataframe, and a function to access the drive and save the files, just in case if runtime ends, we don`t lose the progress. The following notebook was used in a Google Colab environment and copied to this file.

# COLAB NOTEBOOK

## Input:
- A `fasta_variant.csv` file
- A `colab_alphafold2` folder

Be sure that this notebook, the fasta_fariant.csv and colab_alphafold2 folder are the only files on directory

## Output:
- A .zip with all .pdb files

## How to run:
- Select `Runtime` on top menu and then `Run all`

In [None]:
#@title Connect with drive
import glob  # Import glob library for file pattern matching
import os  # Import os library for file and directory operations
from google.colab import drive  # Import drive module from google.colab

drive.mount('/content/drive', force_remount=True)  # Mount Google Drive to the Colab environment
filename = 'AlphaFold2.ipynb'  # Define the filename of the notebook
drive_root = '/content/drive/MyDrive/'  # Define the root directory in Google Drive
pattern = drive_root + '**/' + filename  # Define the pattern to search for the notebook
file_list = glob.glob(pattern, recursive=True)  # Get the list of files matching the pattern
notebook_dir = os.path.dirname(file_list[0])  # Get the directory of the notebook
os.chdir(notebook_dir)  # Change the current working directory to the notebook directory
print(f"Add the input files on {notebook_dir}")  # Print a message to add input files to the directory
for filename in os.listdir():  # Iterate over the files in the directory
    if filename != "fasta_variant.csv" and filename != "colab_alphafold2":  # Check if the file is not the input file or folder
        file_path = os.path.join(notebook_dir, filename)  # Get the full path of the file
        try:
            if os.path.isfile(file_path):  # Check if it is a file
                os.remove(file_path)  # Remove the file
            elif os.path.isdir(file_path):  # Check if it is a directory
                !rm -rf "{file_path}"  # Remove the directory
        except Exception as e:  # Handle exceptions
            print(f"Failed to delete {filename}: {e}")  # Print an error message

In [None]:
#@title Setup
%%time
!pip install --upgrade pandas  # Upgrade pandas library
!pip install pandarallel -U  # Upgrade pandarallel library
import os  # Import os library for file and directory operations
import pandas as pd  # Import pandas library for data manipulation
import shutil  # Import shutil library for file operations
from google.colab import files  # Import files module from google.colab

from sys import version_info  # Import version_info from sys

import os  # Import os library for file and directory operations
import re  # Import re library for regular expressions
import hashlib  # Import hashlib library for hashing
import random  # Import random library for random operations
PYTHON_VERSION = f"{version_info.major}.{version_info.minor}"  # Get the Python version

if not os.path.isfile("COLABFOLD_READY"):  # Check if COLABFOLD_READY file does not exist
  print("installing colabfold...")  # Print a message
  os.system("pip install -q --no-warn-conflicts 'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold'")  # Install colabfold
  if os.environ.get('TPU_NAME', False) != False:  # Check if TPU is available
    os.system("pip uninstall -y jax jaxlib")  # Uninstall jax and jaxlib
    os.system("pip install --no-warn-conflicts --upgrade dm-haiku==0.0.10 'jax[cuda12_pip]'==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html")  # Install specific versions of jax and dm-haiku
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold")  # Create a symbolic link for colabfold
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/alphafold alphafold")  # Create a symbolic link for alphafold
  os.system("touch COLABFOLD_READY")  # Create COLABFOLD_READY file

if not os.path.isfile("CONDA_READY"):  # Check if CONDA_READY file does not exist
  print("installing conda...")  # Print a message
  os.system("wget -qnc https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh")  # Download Miniforge installer
  os.system("bash Miniforge3-Linux-x86_64.sh -bfp /usr/local")  # Install Miniforge
  os.system("mamba config --set auto_update_conda false")  # Configure mamba
  os.system("touch CONDA_READY")  # Create CONDA_READY file

if not os.path.isfile("HH_READY") and not os.path.isfile("AMBER_READY"):  # Check if HH_READY and AMBER_READY files do not exist
  print("installing hhsuite and amber...")  # Print a message
  os.system(f"mamba install -y -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 openmm=7.7.0 python='{PYTHON_VERSION}' pdbfixer")  # Install hhsuite and amber
  os.system("touch HH_READY")  # Create HH_READY file
  os.system("touch AMBER_READY")  # Create AMBER_READY file
else:
  if not os.path.isfile("HH_READY"):  # Check if HH_READY file does not exist
    print("installing hhsuite...")  # Print a message
    os.system(f"mamba install -y -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 python='{PYTHON_VERSION}'")  # Install hhsuite
    os.system("touch HH_READY")  # Create HH_READY file
  if not os.path.isfile("AMBER_READY"):  # Check if AMBER_READY file does not exist
    print("installing amber...")  # Print a message
    os.system(f"mamba install -y -c conda-forge openmm=7.7.0 python='{PYTHON_VERSION}' pdbfixer")  # Install amber
    os.system("touch AMBER_READY")  # Create AMBER_READY file

model_type = "auto"  # Set model type to auto
num_recycles = "1"  # Set number of recycles to 1
recycle_early_stop_tolerance = "auto"  # Set recycle early stop tolerance to auto
relax_max_iterations = 200  # Set relax max iterations to 200
pairing_strategy = "greedy"  # Set pairing strategy to greedy
calc_extra_ptm = False  # Set calc_extra_ptm to False
max_msa = "auto"  # Set max_msa to auto
num_seeds = 1  # Set number of seeds to 1
use_dropout = False  # Set use_dropout to False
num_recycles = int(num_recycles)  # Convert num_recycles to integer
recycle_early_stop_tolerance = None  # Set recycle_early_stop_tolerance to None
if max_msa == "auto": max_msa = None  # Set max_msa to None if it is auto
save_all = False  # Set save_all to False
save_recycles = False  # Set save_recycles to False
save_to_google_drive = False  # Set save_to_google_drive to False
dpi = 200  # Set dpi to 200
msa_mode = "mmseqs2_uniref_env"  # Set msa_mode to mmseqs2_uniref_env
pair_mode = "unpaired_paired"  # Set pair_mode to unpaired_paired

import sys  # Import sys library
import warnings  # Import warnings library
warnings.simplefilter(action='ignore', category=FutureWarning)  # Ignore FutureWarnings
from Bio import BiopythonDeprecationWarning  # Import BiopythonDeprecationWarning
warnings.simplefilter(action='ignore', category=BiopythonDeprecationWarning)  # Ignore BiopythonDeprecationWarnings
from pathlib import Path  # Import Path from pathlib
from colabfold.download import download_alphafold_params, default_data_dir  # Import functions from colabfold.download
from colabfold.utils import setup_logging  # Import setup_logging from colabfold.utils
from colabfold.batch import get_queries, run, set_model_type  # Import functions from colabfold.batch
from colabfold.plot import plot_msa_v2  # Import plot_msa_v2 from colabfold.plot

import os  # Import os library for file and directory operations
import numpy as np  # Import numpy library
try:
  K80_chk = os.popen('nvidia-smi | grep "Tesla K80" | wc -l').read()  # Check if Tesla K80 GPU is available
except:
  K80_chk = "0"  # Set K80_chk to "0" if an exception occurs
  pass
if "1" in K80_chk:  # Check if Tesla K80 GPU is available
  print("WARNING: found GPU Tesla K80: limited to total length < 1000")  # Print a warning message
  if "TF_FORCE_UNIFIED_MEMORY" in os.environ:  # Check if TF_FORCE_UNIFIED_MEMORY is set
    del os.environ["TF_FORCE_UNIFIED_MEMORY"]  # Delete TF_FORCE_UNIFIED_MEMORY
  if "XLA_PYTHON_CLIENT_MEM_FRACTION" in os.environ:  # Check if XLA_PYTHON_CLIENT_MEM_FRACTION is set
    del os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]  # Delete XLA_PYTHON_CLIENT_MEM_FRACTION

from pathlib import Path  # Import Path from pathlib

python_version = f"{version_info.major}.{version_info.minor}"  # Get the Python version

In [None]:
#@title Functions
def add_hash(x, y):
  return x + "_" + hashlib.sha1(y.encode()).hexdigest()[:5]  # Add a hash to the string

def check(folder):
  if os.path.exists(folder):  # Check if the folder exists
    return False  # Return False if the folder exists
  else:
    return True  # Return True if the folder does not exist

def create_repository(name, sequence):
  python_version = f"{version_info.major}.{version_info.minor}"  # Get the Python version
  query_sequence = sequence  # Set the query sequence
  name = name  # Set the name
  num_relax = 0  # Set the number of relaxations to 0
  template_mode = "pdb100"  # Set the template mode to pdb100
  use_amber = num_relax > 0  # Set use_amber based on num_relax
  query_sequence = "".join(query_sequence.split())  # Remove whitespace from the query sequence

  if not check(name):  # Check if the name is not available
    n = 0  # Initialize n to 0
    while not check(f"{name}_{n}"): n += 1  # Increment n until a unique name is found
    name = f"{name}_{n}"  # Set the name to the unique name
  os.makedirs(name, exist_ok=True)  # Create the directory
  queries_path = os.path.join(name, f"{name}.csv")  # Set the path to the queries file
  with open(queries_path, "w") as text_file:  # Open the queries file for writing
    text_file.write(f"id,sequence\n{name},{query_sequence}")  # Write the query sequence to the file

def create_a3m_file(name, sequence):
  a3m_file = os.path.join(name, f"{name}.single_sequence.a3m")  # Set the path to the a3m file
  with open(a3m_file, "w") as text_file:  # Open the a3m file for writing
    text_file.write(">1\n%s" % sequence)  # Write the sequence to the file

def prediction_callback(protein_obj, length, prediction_result, input_features, mode):
  model_name, relaxed = mode  # Unpack the mode

def delete_folder(folder_name):
  """Deletes a folder and its contents.

  Args:
      folder_name: The name of the folder to delete.
  """
  if os.path.exists(folder_name):  # Check if the folder exists
    try:
      shutil.rmtree(folder_name)  # Remove the folder and its contents
      print(f"Folder '{folder_name}' deleted successfully.")  # Print a success message
    except OSError as e:  # Handle exceptions
      print(f"Error deleting folder '{folder_name}': {e}")  # Print an error message
  else:
    print(f"Folder '{folder_name}' does not exist.")  # Print a message if the folder does not exist

def run_colab_alphafold2(name, sequence):
    filename = name.replace('_p.', '_')  # Replace '_p.' with '_' in the name
    basejobname = "".join(name.split())  # Remove whitespace from the name
    basejobname = re.sub(r'\W+', '', basejobname)  # Remove non-alphanumeric characters from the name
    name = add_hash(basejobname, sequence)  # Add a hash to the name
    create_repository(name, sequence)  # Create a repository for the sequence
    create_a3m_file(name, sequence)  # Create an a3m file for the sequence
    queries_path = os.path.join(name, f"{name}.csv")  # Set the path to the queries file
    python_version = f'{sys.version_info.major}.{sys.version_info.minor}'  # Get the Python version
    if f"/usr/local/lib/python{python_version}/site-packages/" not in sys.path:  # Check if the site-packages directory is not in sys.path
        sys.path.insert(0, f"/usr/local/lib/python{python_version}/site-packages/")  # Add the site-packages directory to sys.path
    queries, is_complex = get_queries(queries_path)  # Get the queries and check if it is a complex
    model_type = set_model_type(is_complex, "auto")  # Set the model type
    use_cluster_profile = not ("multimer" in model_type and max_msa is not None)  # Set use_cluster_profile based on model type and max_msa
    download_alphafold_params(model_type, Path("."))  # Download AlphaFold parameters
    results = run(
        queries=queries,  # Set the queries
        result_dir=name,  # Set the result directory
        use_templates=True,  # Set use_templates to True
        custom_template_path=None,  # Set custom_template_path to None
        num_relax=0,  # Set num_relax to 0
        msa_mode="mmseqs2_uniref_env",  # Set msa_mode to mmseqs2_uniref_env
        model_type=model_type,  # Set model_type
        num_models=1,  # Set num_models to 1
        num_recycles=num_recycles,  # Set num_recycles
        relax_max_iterations=relax_max_iterations,  # Set relax_max_iterations
        recycle_early_stop_tolerance=recycle_early_stop_tolerance,  # Set recycle_early_stop_tolerance
        num_seeds=num_seeds,  # Set num_seeds
        use_dropout=use_dropout,  # Set use_dropout
        model_order=[1],  # Set model_order
        is_complex=is_complex,  # Set is_complex
        data_dir=Path("."),  # Set data_dir
        keep_existing_results=False,  # Set keep_existing_results to False
        rank_by="auto",  # Set rank_by to auto
        pair_mode=pair_mode,  # Set pair_mode
        pairing_strategy=pairing_strategy,  # Set pairing_strategy
        stop_at_score=float(100),  # Set stop_at_score
        prediction_callback=prediction_callback,  # Set prediction_callback
        dpi=dpi,  # Set dpi
        zip_results=False,  # Set zip_results to False
        save_all=save_all,  # Set save_all
        max_msa=max_msa,  # Set max_msa
        use_cluster_profile=use_cluster_profile,  # Set use_cluster_profile
        input_features_callback=None,  # Set input_features_callback to None
        save_recycles=save_recycles,  # Set save_recycles
        user_agent="colabfold/google-colab-main",  # Set user_agent
        calc_extra_ptm=calc_extra_ptm,  # Set calc_extra_ptm
    )
    pdb_files = [f for f in os.listdir(name) if f.endswith('.pdb')]  # Get the list of pdb files
    if not pdb_files:  # Check if no pdb files are found
        print(f"No .pdb files found in {name}")  # Print a message
        return
    pdb_file = pdb_files[0]  # Get the first pdb file
    pdb_file_path = os.path.join(name, pdb_file)  # Set the path to the pdb file
    destination_path = os.path.join(PDB_PATH, filename + ".pdb")  # Set the destination path for the pdb file
    os.rename(pdb_file_path, destination_path)  # Rename the pdb file to the destination path
    # files.download(destination_path)  # Download the pdb file
    delete_folder(name)  # Delete the folder
    print(f"Model prediction saved to {destination_path}")  # Print a success message

def check_and_update_status(row):
    variant = row["variant"]  # Get the variant from the row
    filename = variant.replace('_p.', '_')  # Replace '_p.' with '_' in the variant name
    file_path = f"{PDB_PATH}/{filename}.pdb"  # Set the file path for the pdb file
    if os.path.isfile(file_path) or row["colab_alphafold2"] == 'concluded':  # Check if the pdb file exists or the status is concluded
        return 'concluded'  # Return 'concluded'
    return 'not_concluded'  # Return 'not_concluded'

def update_colab_alphafold_status(df):
  if 'colab_alphafold2' not in df.columns:  # Check if 'colab_alphafold2' column is not in the DataFrame
    df['colab_alphafold2'] = 'not_concluded'  # Add 'colab_alphafold2' column with default value 'not_concluded'
  df['colab_alphafold2'] = df.apply(check_and_update_status, axis=1)  # Update 'colab_alphafold2' status for eachdef add_hash(x, y):
  return x + "_" + hashlib.sha1(y.encode()).hexdigest()[:5]  # Add a hash to the string

def check(folder):
  if os.path.exists(folder):  # Check if the folder exists
    return False  # Return False if the folder exists
  else:
    return True  # Return True if the folder does not exist

def create_repository(name, sequence):
  python_version = f"{version_info.major}.{version_info.minor}"  # Get the Python version
  query_sequence = sequence  # Set the query sequence
  name = name  # Set the name
  num_relax = 0  # Set the number of relaxations to 0
  template_mode = "pdb100"  # Set the template mode to pdb100
  use_amber = num_relax > 0  # Set use_amber based on num_relax
  query_sequence = "".join(query_sequence.split())  # Remove whitespace from the query sequence

  if not check(name):  # Check if the name is not available
    n = 0  # Initialize n to 0
    while not check(f"{name}_{n}"): n += 1  # Increment n until a unique name is found
    name = f"{name}_{n}"  # Set the name to the unique name
  os.makedirs(name, exist_ok=True)  # Create the directory
  queries_path = os.path.join(name, f"{name}.csv")  # Set the path to the queries file
  with open(queries_path, "w") as text_file:  # Open the queries file for writing
    text_file.write(f"id,sequence\n{name},{query_sequence}")  # Write the query sequence to the file

def create_a3m_file(name, sequence):
  a3m_file = os.path.join(name, f"{name}.single_sequence.a3m")  # Set the path to the a3m file
  with open(a3m_file, "w") as text_file:  # Open the a3m file for writing
    text_file.write(">1\n%s" % sequence)  # Write the sequence to the file

def prediction_callback(protein_obj, length, prediction_result, input_features, mode):
  model_name, relaxed = mode  # Unpack the mode

def delete_folder(folder_name):
  """Deletes a folder and its contents.

  Args:
      folder_name: The name of the folder to delete.
  """
  if os.path.exists(folder_name):  # Check if the folder exists
    try:
      shutil.rmtree(folder_name)  # Remove the folder and its contents
      print(f"Folder '{folder_name}' deleted successfully.")  # Print a success message
    except OSError as e:  # Handle exceptions
      print(f"Error deleting folder '{folder_name}': {e}")  # Print an error message
  else:
    print(f"Folder '{folder_name}' does not exist.")  # Print a message if the folder does not exist

def run_colab_alphafold2(name, sequence):
    filename = name.replace('_p.', '_')  # Replace '_p.' with '_' in the name
    basejobname = "".join(name.split())  # Remove whitespace from the name
    basejobname = re.sub(r'\W+', '', basejobname)  # Remove non-alphanumeric characters from the name
    name = add_hash(basejobname, sequence)  # Add a hash to the name
    create_repository(name, sequence)  # Create a repository for the sequence
    create_a3m_file(name, sequence)  # Create an a3m file for the sequence
    queries_path = os.path.join(name, f"{name}.csv")  # Set the path to the queries file
    python_version = f'{sys.version_info.major}.{sys.version_info.minor}'  # Get the Python version
    if f"/usr/local/lib/python{python_version}/site-packages/" not in sys.path:  # Check if the site-packages directory is not in sys.path
        sys.path.insert(0, f"/usr/local/lib/python{python_version}/site-packages/")  # Add the site-packages directory to sys.path
    queries, is_complex = get_queries(queries_path)  # Get the queries and check if it is a complex
    model_type = set_model_type(is_complex, "auto")  # Set the model type
    use_cluster_profile = not ("multimer" in model_type and max_msa is not None)  # Set use_cluster_profile based on model type and max_msa
    download_alphafold_params(model_type, Path("."))  # Download AlphaFold parameters
    results = run(
        queries=queries,  # Set the queries
        result_dir=name,  # Set the result directory
        use_templates=True,  # Set use_templates to True
        custom_template_path=None,  # Set custom_template_path to None
        num_relax=0,  # Set num_relax to 0
        msa_mode="mmseqs2_uniref_env",  # Set msa_mode to mmseqs2_uniref_env
        model_type=model_type,  # Set model_type
        num_models=1,  # Set num_models to 1
        num_recycles=num_recycles,  # Set num_recycles
        relax_max_iterations=relax_max_iterations,  # Set relax_max_iterations
        recycle_early_stop_tolerance=recycle_early_stop_tolerance,  # Set recycle_early_stop_tolerance
        num_seeds=num_seeds,  # Set num_seeds
        use_dropout=use_dropout,  # Set use_dropout
        model_order=[1],  # Set model_order
        is_complex=is_complex,  # Set is_complex
        data_dir=Path("."),  # Set data_dir
        keep_existing_results=False,  # Set keep_existing_results to False
        rank_by="auto",  # Set rank_by to auto
        pair_mode=pair_mode,  # Set pair_mode
        pairing_strategy=pairing_strategy,  # Set pairing_strategy
        stop_at_score=float(100),  # Set stop_at_score
        prediction_callback=prediction_callback,  # Set prediction_callback
        dpi=dpi,  # Set dpi
        zip_results=False,  # Set zip_results to False
        save_all=save_all,  # Set save_all
        max_msa=max_msa,  # Set max_msa
        use_cluster_profile=use_cluster_profile,  # Set use_cluster_profile
        input_features_callback=None,  # Set input_features_callback to None
        save_recycles=save_recycles,  # Set save_recycles
        user_agent="colabfold/google-colab-main",  # Set user_agent
        calc_extra_ptm=calc_extra_ptm,  # Set calc_extra_ptm
    )
    pdb_files = [f for f in os.listdir(name) if f.endswith('.pdb')]  # Get the list of pdb files
    if not pdb_files:  # Check if no pdb files are found
        print(f"No .pdb files found in {name}")  # Print a message
        return
    pdb_file = pdb_files[0]  # Get the first pdb file
    pdb_file_path = os.path.join(name, pdb_file)  # Set the path to the pdb file
    destination_path = os.path.join(PDB_PATH, filename + ".pdb")  # Set the destination path for the pdb file
    os.rename(pdb_file_path, destination_path)  # Rename the pdb file to the destination path
    # files.download(destination_path)  # Download the pdb file
    delete_folder(name)  # Delete the folder
    print(f"Model prediction saved to {destination_path}")  # Print a success message

def check_and_update_status(row):
    variant = row["variant"]  # Get the variant from the row
    filename = variant.replace('_p.', '_')  # Replace '_p.' with '_' in the variant name
    file_path = f"{PDB_PATH}/{filename}.pdb"  # Set the file path for the pdb file
    if os.path.isfile(file_path) or row["colab_alphafold2"] == 'concluded':  # Check if the pdb file exists or the status is concluded
        return 'concluded'  # Return 'concluded'
    return 'not_concluded'  # Return 'not_concluded'

def update_colab_alphafold_status(df):
    if 'colab_alphafold2' not in df.columns:  # Check if 'colab_alphafold2' column is not in the DataFrame
        df['colab_alphafold2'] = 'not_concluded'  # Add 'colab_alphafold2' column with default value 'not_concluded'
    df['colab_alphafold2'] = df.apply(check_and_update_status, axis=1)  # Update 'colab_alphafold2' status for each row
    print("Status updated based on existing files")  # Print a status update message
    return df  # Return the updated DataFrame

def download_zip():
    zip_filename = "colab_alphafold2.zip"  # Define the name of the zip file
    zip_command = f"zip -r {zip_filename} /content/colab_alphafold2"  # Define the zip command
    os.system(zip_command)  # Execute the zip command
    files.download(zip_filename)  # Download the zip file


### Running Colab Alphafold2...

In [None]:
PDB_PATH = "colab_alphafold2"  # Define the path to the PDB files
if not os.path.exists(PDB_PATH):  # Check if the PDB path does not exist
    os.makedirs(PDB_PATH)  # Create the PDB path
variant_df = pd.read_csv('fasta_variant.csv', sep=';')  # Read the CSV file into a DataFrame
variant_df = update_colab_alphafold_status(variant_df)  # Update the colab_alphafold2 status in the DataFrame
# variant_df = variant_df.tail(200)  # Optionally limit the DataFrame to the last 200 rows
variant_df.head()  # Display the first few rows of the DataFrame


In [None]:
variant_df['colab_alphafold2'].value_counts()  # Count the values in the colab_alphafold2 column

In [None]:
not_concluded_df = variant_df[variant_df['colab_alphafold2'] == 'not_concluded']  # Filter rows with 'not_concluded' status
for i, (index, row) in enumerate(not_concluded_df.iterrows()):  # Iterate over the rows of the not concluded DataFrame
    print(f"Processing {row['variant']} ------- {i+1} of {len(not_concluded_df)}")  # Print the progress
    run_colab_alphafold2(row['variant'], row['fasta'])  # Run colab_alphafold2 for the variant and sequence

Auxiliar functions

In [None]:
!zip -r colab_alphafold2.zip colab_alphafold2  # Zip the colab_alphafold2 directory
files.download("colab_alphafold2.zip")  # Download the zip file
variant_df.to_csv('variant_df.csv', index=False, sep=';')  # Save the updated DataFrame to CSV
files.download('variant_df.csv')  # Download the CSV file