<a href="https://colab.research.google.com/github/sanjyotshenoy/alphacross-xl/blob/main/Copy_of_AlphaCross_XL_(Colab_Version).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title Click on the Start Button or Press Ctrl(Cmd on Mac)+Enter
# Widget UI Imports
import ipywidgets as widgets
from ipywidgets import Layout
from IPython.display import display, clear_output
import requests, gdown, time, datetime
import numpy as np
import pandas as pd
import re
import os, shutil
import urllib.request
from matplotlib import pyplot as plt
import seaborn as sns
from io import BytesIO
import pathlib
import zipfile
import pathlib

# Colab specific (Comment these out on local)
from google.colab import files
os.chdir('/content')


# When Running the Cell again in Colab, all files in cwd must be deleted
# To preserve memory
# Can comment this when working on local

main_base_dir_parent = os.getcwd()
main_base_dir_parent_path = pathlib.Path(main_base_dir_parent)
if 'AlphaCrossXL Files' in main_base_dir_parent:
    os.chdir(main_base_dir_parent_path.parent)
    main_base_dir_parent = os.getcwd()
else:
    pass
try:
    os.mkdir(os.path.join(main_base_dir_parent, "AlphaCrossXL Files"))
    os.chdir(os.path.join(os.getcwd(), "AlphaCrossXL Files"))
    main_base_dir = os.getcwd()
except:
    shutil.rmtree(os.path.join(main_base_dir_parent, "AlphaCrossXL Files"))
    os.mkdir(os.path.join(main_base_dir_parent, "AlphaCrossXL Files"))
    os.chdir(os.path.join(os.getcwd(), "AlphaCrossXL Files"))
    main_base_dir = os.getcwd()
os.chdir(main_base_dir)



class AlphaCrossXLBackend:
    def __init__(self):
        self.data_file = None
        self.fasta_db = None
        self.main_base_dir_parent = None
        self.base_dir = None
        self.residue_distance_threshold = None
        self.input_file_columns = None
        self.input_uniprotid_column = None
        self.input_peptide_a_column = None
        self.input_peptide_b_column = None
        self.input_link_site_a_column = None
        self.input_link_site_b_column = None
        self.input_xlink_types_column = None
        self.x_link_types = None
        self.user_x_link_type_chosen = None
        self.is_protein_centric = False
        self.is_visualization_allowed = False
        self.is_manual_protein_struct = False
        self.manual_protein_struct_file = None
        self.are_manual_structures_verified = False

        self.XLMS_cif_files_protein_names = None
        self.erroneous_XLMS_cif_files_protein_names = []

        self.XLMS_raw_input = None
        self.XLMS_input = None
        self.XLMS_proteins = None
        self.XLMS_Chain_1 = None
        self.XLMS_Chain_2 = None
        self.fa = None
        self.XLMS_DF = None
        self.XLMS_DF_NO_DUPES_NO_SHARED = None
        self.XLMS_proteins_with_structure_info = None
        self.XLMS_proteins_with_structure_info_df = None
        self.XLMS_proteins_without_structure_info = None
        self.df_bplt = None
        self.df_hplt = None
        self.df_cplt = None

    def initialize_input_file(self):
        '''
        Initializes the input data file for processing.

        args:
            None
        creates:
            self.input_file_columns
            self.XLMS_raw_input
        return: None

        Improvement:
        # first copy the input files to base_dir
        # Join the base_dir with the filename to get the destination path
        destination = os.path.join(base_dir, os.path.basename(original_path))

        # Copy the file
        shutil.copy2(original_path, destination)
        '''

        if self.data_file.endswith(".csv"):
            self.XLMS_raw_input = pd.read_csv(self.data_file)
        elif self.data_file.endswith(".xlsx"):
            self.XLMS_raw_input = pd.read_excel(self.data_file)
        else:
            raise Exception("File Format Error")

        input_columns = list((self.XLMS_raw_input.columns))
        self.input_file_columns = input_columns

    def get_input_file_columns(self):
        '''
        This function returns the columns of the input file.

        args:
            None
        creates:
            None
        return:
            input_file_columns
        '''
        return self.input_file_columns

    def set_input_file_columns(self, user_chosen_columns_dict, reset=False):
        '''
        This function sets the corresponding columns of the input file for further processing.

        args:
            user_chosen_columns_dict
        creates:
            self.XLMS_raw_input
        return:
            None
        '''
        if reset:
            self.XLMS_raw_input.rename(
                {
                    "Peptide A": self.input_peptide_a_column,
                    "Residue 1": self.input_link_site_a_column,
                    "Peptide B": self.input_peptide_b_column,
                    "Residue 2": self.input_link_site_b_column,
                    "X-link type": self.input_xlink_types_column,
                    "uniprotID": self.input_uniprotid_column,
                },
                axis=1,
                inplace=True,
            )
        else:
            self.input_peptide_a_column = user_chosen_columns_dict["Peptide A"]
            self.input_link_site_a_column = user_chosen_columns_dict["Residue 1"]
            self.input_peptide_b_column = user_chosen_columns_dict["Peptide B"]
            self.input_link_site_b_column = user_chosen_columns_dict["Residue 2"]
            self.input_xlink_types_column = user_chosen_columns_dict["X-link type"]
            self.input_uniprotid_column = user_chosen_columns_dict["uniprotID"]

            self.XLMS_raw_input.rename(
                    {
                        self.input_peptide_a_column: "Peptide A",
                        self.input_link_site_a_column: "Residue 1",
                        self.input_peptide_b_column: "Peptide B",
                        self.input_link_site_b_column: "Residue 2",
                        self.input_xlink_types_column: "X-link type",
                        self.input_uniprotid_column: "uniprotID",
                    },
                    axis=1,
                    inplace=True,
            )


    def get_input_xlink_types(self):
        '''
        This function returns the unique cross-link types found in input file.

        args:
            None
        creates:
            None
        return:
            x_link_types
        '''
        x_link_types = list(self.XLMS_raw_input["X-link type"].unique())
        self.x_link_types = x_link_types

        return self.x_link_types

    def set_input_xlink_type_threshold_dist(self, user_x_link_type_chosen, user_threshold_dist_chosen, reset=False):
        '''
        This function sets the cross-link type and threshold distance
        chosen by the user.

        args:
            user_x_link_type_chosen
        creates:
            self.XLMS_input, self.XLMS_proteins,
            self.XLMS_Chain_1, self.XLMS_Chain_2,
            self.residue_distance_threshold
        return:
            None
        '''
        if reset:
            self.XLMS_input = None
            self.XLMS_proteins = None
            self.XLMS_Chain_1 = None
            self.XLMS_Chain_2 = None
            self.residue_distance_threshold = None
            self.user_x_link_type_chosen = None
        else:
            self.residue_distance_threshold = user_threshold_dist_chosen
            self.user_x_link_type_chosen = user_x_link_type_chosen

            self.XLMS_input = self.XLMS_raw_input[
                self.XLMS_raw_input["X-link type"] == self.user_x_link_type_chosen
            ]

            self.XLMS_proteins = self.XLMS_input["uniprotID"].unique()
            self.XLMS_Chain_1 = self.XLMS_input["Peptide A"].unique()
            self.XLMS_Chain_2 = self.XLMS_input["Peptide B"].unique()

    def process_fasta(self):
        '''
        This function processes the FASTA file.

        Improvement Suggestion: Instead of FASTA Database Upload
        Just download FASTA on the fly
        Snippet:
        uurl = "https://rest.uniprot.org/uniprotkb/stream?"
        output_format = "compressed=false&fields=accession%2Creviewed%2Cid%2Cprotein_name%2Cgene_names%2Corganism_name%2Clength%2Csequence%2Cxref_pdb&format=tsv"
        uurl += output_format
        query = "model_organism%3A9606%20AND%20(reviewed:true)%20AND%20(database:pdb)%20AND%20(database:alphafolddb)"
        uurl += "&query=" + query
        ureq = requests.get(uurl)
        udata = pd.read_csv(BytesIO(ureq.content), delimiter="\t")

        args:
            None
        creates:
            self.fa
        return:
            None
        '''
        import pyfastx as pyfx

        self.fa = pyfx.Fasta(self.fasta_db, key_func=lambda x: x.split("|")[1], )

    def verify_and_process_manual_protein_struct_file(self, reset=False):
        """
        Verify and process uploaded protein structure files.

        args:
            reset (bool, optional): If True, reset all manual structure data. Defaults to False.
        creates:
            self.XLMS_cif_files_protein_names
            self.are_manual_structures_verified
            self.manual_protein_struct_file
            self.is_manual_protein_struct
        raises:
            Exception: If no .CIF files are found or processing fails
        return:
            None
        """
        if self.manual_protein_struct_file is not None and self.is_manual_protein_struct and not reset:
            try:
                os.mkdir(os.path.join(self.base_dir,"Uploaded Structures"))
                os.chdir(os.path.join(self.base_dir,"Uploaded Structures"))
            except:
                shutil.rmtree(os.path.join(self.base_dir, "Uploaded Structures"))
                os.mkdir(os.path.join(self.base_dir,"Uploaded Structures"))
                os.chdir(os.path.join(self.base_dir,"Uploaded Structures"))

            with zipfile.ZipFile(self.manual_protein_struct_file, 'r') as zip_ref:
                zip_ref.extractall(os.path.join(self.base_dir,"Uploaded Structures"))

            self.XLMS_cif_files_protein_names = [name[:-4] for name in os.listdir(".") if name.endswith(".cif")]

            if len(self.XLMS_cif_files_protein_names) == 0:
                clear_output()
                os.chdir(self.base_dir)
                shutil.rmtree(os.path.join(self.base_dir, "Uploaded Structures"))
                self.XLMS_cif_files_protein_names = None
                self.are_manual_structures_verified = False
                self.manual_protein_struct_file = None
                self.is_manual_protein_struct = False
                print("No .CIF Structure Files Found")
            else:
                self.are_manual_structures_verified = True
                print("Verified Manual Protein Structure Files")
                print(".CIF Files for UniProt IDs: ", self.XLMS_cif_files_protein_names)
                print("Please note: It's your responsibility to make sure the .CIF files are named appropriately.")
            os.chdir(self.base_dir)
        elif reset:
            os.chdir(self.base_dir)
            shutil.rmtree(os.path.join(self.base_dir, "Uploaded Structures"))
            self.XLMS_cif_files_protein_names = None
            self.are_manual_structures_verified = False
            self.manual_protein_struct_file = None
            self.is_manual_protein_struct = False

        else:
            raise Exception("Error in workflow: verify_and_process_manual_protein_struct_file. Contact Development Team.")

    def cleanList(self, list_var):
        """
        This function cleans the list and returns a string

        args:
            list_var: list of strings
        return:
            list_str: string of the list
        """
        list_str = str(list_var)
        list_str = list_str.replace("[", "")
        list_str = list_str.replace("]", "")
        list_str = list_str.replace("'", "")
        return list_str

    def get_peptide_starts(self, row):
        """
        This function returns the start positions of the peptides in the FASTA file
        args:
            row: row of the dataframe
        return:
            start_list_a: list of start positions of peptide A
            start_list_b: list of start positions of peptide B
        """

        peptide_a = re.compile(row["Peptide A"])
        peptide_b = re.compile(row["Peptide B"])
        start_list_a = []
        start_list_b = []

        for m in peptide_a.finditer(self.fa[row["uniprotID"]].seq):
            start_list_a.append(m.start())

        for m in peptide_b.finditer(self.fa[row["uniprotID"]].seq):
            start_list_b.append(m.start())

        return [start_list_a, start_list_b]

    def peptide_start_a(self, row):
        """
        This function cleans the list of the start positions of peptides
        args:
            row: row of the dataframe
        return:
            list_str: string of the list of peptide A
        """
        return self.cleanList(row[0])

    def peptide_start_b(self, row):
        """
        This function cleans the list of the start positions of peptides
        args:
            row: row of the dataframe
        return:
            list_str: string of the list of peptide B
        """

        return self.cleanList(row[1])

    def remove_shared_peptides(self, row):
        """
        This function removes the shared peptides
        args:
            row: row of the dataframe
        return:
            True: if the peptides are not shared
            False: if the peptides are shared
        """
        if len(row["Peptide A-Pos"]) > 0 and len(row["Peptide B-Pos"]) > 0:
            return True
        else:
            return False

    def get_actual_pos_from_residue_pep_a(self, row):
        """
        This function returns the actual position of the residue in the chain
        args:
            row: row of the dataframe
        return:
            residue_a_loc: actual position of the residue in the chain

        """
        peptide_a_start = int(row["Peptide A-Pos"])
        residue_a = row["Residue 1"]
        residue_a_loc = re.findall(r"\d+", residue_a)
        if len(residue_a_loc) == 1:
            return int(residue_a_loc[0]) + peptide_a_start
        else:
            raise Exception("Residue Location Format Incorrect.")

    def get_actual_pos_from_residue_pep_b(self, row):
        """
        This function returns the actual position of the residue in the chain
        args:
            row: row of the dataframe
        return:
            residue_b_loc: actual position of the residue in the chain
        """

        peptide_b_start = int(row["Peptide B-Pos"])
        residue_b = row["Residue 2"]
        residue_b_loc = re.findall(r"\d+", residue_b)
        if len(residue_b_loc) == 1:
            return int(residue_b_loc[0]) + peptide_b_start
        else:
            raise Exception("Residue Location Format Incorrect.")

    def convert_to_xlms_format(self):

        temp = self.XLMS_input.apply(lambda row: self.get_peptide_starts(row), axis=1)
        temp2 = temp.copy()

        self.XLMS_input["Peptide A-Pos"] = temp2.apply(
            lambda row: self.peptide_start_a(row)
        ).copy()
        self.XLMS_input["Peptide B-Pos"] = temp2.apply(
            lambda row: self.peptide_start_b(row)
        ).copy()

        self.XLMS_DF = self.XLMS_input[
            [
                "uniprotID",
                "X-link type",
                "Peptide A",
                "Residue 1",
                "Peptide A-Pos",
                "Peptide B",
                "Residue 2",
                "Peptide B-Pos",
            ]
        ].copy()

        print(self.XLMS_DF.head())

    # TO DO
    # This function needs to be changed to accommodate protein centric formats
    def calculate_absolute_chain_pos(self):
        """
        Calculate absolute positions of cross-linking residues in protein chains.

        Processes peptide positions to:
            1. Filter out shared peptides
            2. Calculate absolute positions for both peptides
            3. Remove duplicates

        Creates:
            self.XLMS_DF_NO_DUPES_NO_SHARED: DataFrame with absolute positions and no duplicates

        Raises:
            ValueError: If residue location format is incorrect
        """
        def get_absolute_positions(df):
            """
            Calculate absolute positions for both peptides in parallel.

            Args:
                df: DataFrame with peptide positions and residue information

            Returns:
                DataFrame with added absolute position columns
            """
            def extract_residue_pos(pos_str, residue_col):
                """Extract and validate residue position."""
                try:
                    peptide_start = int(pos_str)
                    residue_num = re.findall(r"\d+", residue_col)
                    #print(residue_num)

                    if len(residue_num) != 1:
                        raise ValueError(f"Invalid residue format in {residue_col}")

                    return int(residue_num[0]) + peptide_start
                except ValueError as e:
                    raise ValueError(f"Position calculation failed: {str(e)}")

            # Filter rows where both peptides have valid positions
            valid_peptides = (df["Peptide A-Pos"].str.len() > 0) & (df["Peptide B-Pos"].str.len() > 0)
            df = df[valid_peptides].copy()
            #df.to_csv("test.csv")

            # Calculate absolute positions for both peptides
            try:
                df["Absolute Peptide A-Pos"] = df.apply(
                    lambda row: extract_residue_pos(row["Peptide A-Pos"], row["Residue 1"]),
                    axis=1
                )
                df["Absolute Peptide B-Pos"] = df.apply(
                    lambda row: extract_residue_pos(row["Peptide B-Pos"], row["Residue 2"]),
                    axis=1
                )
            except ValueError as e:
                raise ValueError(f"Position calculation failed: {str(e)}")

            return df

        try:
            # Process positions and remove duplicates in one chain
            self.XLMS_DF_NO_DUPES_NO_SHARED = (
                self.XLMS_DF.pipe(get_absolute_positions)
                            .drop_duplicates()
                            .reset_index(drop=True)
            )

            # Log processing results
            initial_count = len(self.XLMS_DF)
            final_count = len(self.XLMS_DF_NO_DUPES_NO_SHARED)
            shared_count = initial_count - len(self.XLMS_DF[
                (self.XLMS_DF["Peptide A-Pos"].str.len() > 0) &
                (self.XLMS_DF["Peptide B-Pos"].str.len() > 0)
            ])
            duplicate_count = initial_count - shared_count - final_count

            print(f"Processing summary:")
            print(f"- Initial entries: {initial_count}")
            print(f"- Shared peptides removed: {shared_count}")
            print(f"- Duplicates removed: {duplicate_count}")
            print(f"- Final entries: {final_count}")

        except Exception as e:
            raise ValueError(f"Chain position calculation failed: {str(e)}")

    def proteins_from_alphafold(self):
        '''
        This function downloads mmCIF protein structure files from AlphaFold Protein Structure Database
        and stores them in a directory named AlphaFold Structures.

        args:
            None
        creates:
            self.XLMS_proteins_with_structure_info,
            self.XLMS_proteins_left
            AlphaFold Structures directory is created if it doesn't exist
        return:
            None
        '''
        current_working_dir = self.base_dir
        clear_output()
        try:
            os.mkdir(os.path.join(self.base_dir, "AlphaFold Structures"))
            os.chdir(os.path.join(self.base_dir, "AlphaFold Structures"))
            print("Created Directory for AlphaFold Structures: ", os.getcwd())
            self.XLMS_proteins_with_structure_info = []
            self.XLMS_proteins_left = self.XLMS_proteins

        except FileExistsError:
            #shutil.rmtree(os.path.join(self.base_dir, "AlphaFold Structures"))
            #os.mkdir(os.path.join(self.base_dir, "AlphaFold Structures"))
            os.chdir(os.path.join(self.base_dir, "AlphaFold Structures"))
            print("Directory for AlphaFold Structures already exists: ", os.getcwd())
            #os.chdir('/content/AlphaFold Structures')
            current_dir = os.getcwd()
            sub_dir_list = [sub_dir[0].split('/')[-1] for sub_dir in os.walk(current_dir)]
            sub_dir_list.remove('AlphaFold Structures')
            irrelevant_sub_dir_list = [sub_dir for sub_dir in sub_dir_list if sub_dir not in self.XLMS_proteins]
            for sub_dir in irrelevant_sub_dir_list:
                shutil.rmtree(os.path.join(self.base_dir, "AlphaFold Structures", sub_dir))
            relevant_sub_dir_list = [sub_dir for sub_dir in sub_dir_list if sub_dir in self.XLMS_proteins]
            print("Irrelevant Sub-Directories were ")
            self.XLMS_proteins_with_structure_info = relevant_sub_dir_list
            self.XLMS_proteins_left = [protein for protein in self.XLMS_proteins if protein not in self.XLMS_proteins_with_structure_info]
        except Exception as e:
            print(f"An unexpected error occurred while creating the directory: {e}")

        print("Downloading Structures from AlphaFold.")
        self.XLMS_proteins_without_structure_info = []
        for protein in self.XLMS_proteins_left:
            os.makedirs( # Not really required
                os.path.join(self.base_dir, "AlphaFold Structures"), exist_ok=True
            )
            os.chdir(os.path.join(self.base_dir, "AlphaFold Structures"))
            os.makedirs(
                os.path.join(self.base_dir, "AlphaFold Structures", protein),
                exist_ok=True,
            )
            os.chdir(os.path.join(self.base_dir, "AlphaFold Structures", protein))
            try:
                # Change this URL for updates to AlphaFold Structures
                urllib.request.urlretrieve(
                    f"https://alphafold.ebi.ac.uk/files/AF-{protein}-F1-model_v4.cif",
                    f"{protein}.cif",
                )
                print(
                    f"AlphaFold Structure downloaded for the following protein: {protein}"
                )
                self.XLMS_proteins_with_structure_info.append(protein)
            except urllib.error.HTTPError as e:
                os.chdir(os.path.join(self.base_dir, "AlphaFold Structures"))
                os.rmdir(os.path.join(self.base_dir, "AlphaFold Structures", protein))
                self.XLMS_proteins_without_structure_info.append(protein)
                print(
                    f"AlphaFold Structure not downloaded/found for the following protein: {protein}"
                )
            except Exception as e:
                print(
                    f"An unexpected error occurred while processing protein {protein}: {e}"
                )
        clear_output()
        print("AlphaFold Structures downloaded in directory:", os.path.join(self.base_dir, "AlphaFold Structures"))
        print("AlphaFold Structures not found for: ", self.XLMS_proteins_without_structure_info)
        os.chdir(current_working_dir)

    def _get_manual_structure_distance_data(self, row):
        """
        Calculate distance between two residues for uploaded .CIF structures.

        Args:
            row: DataFrame row containing protein and peptide information

        Returns:
            dict: Contains residue indices, numbers and distance or error status
        """
        # Custom exceptions for specific error cases
        class StructureError(Exception): pass
        class MultipleChainError(StructureError): pass
        class ResidueLocationError(StructureError): pass
        class PeptideError(StructureError): pass

        # Initialize default return dictionary
        data_dict = {key: 'N/A' for key in [
            'residue-1-start-resindex', 'residue-2-start-resindex',
            'residue-1-start-resnum', 'residue-2-start-resnum',
            'residue-distance'
        ]}

        if row['uniprotID'] not in self.XLMS_cif_files_protein_names:
            return data_dict

        try:
            import prody as prd
            from contextlib import suppress

            # Load and validate structure
            protein_struct = prd.parseMMCIF(os.path.join(self.base_dir, 'Uploaded Structures', f'{row["uniprotID"]}.cif'))
            protein_struct_hv = protein_struct.getHierView()

            if len(list(protein_struct_hv)) != 1:
                raise MultipleChainError("Structure file has multiple chains")

            chain = list(list(protein_struct_hv)[0])
            protein_struct_seq = list(protein_struct_hv)[0].getSequence()
            #protein_struct_residue_list = list(list(protein_struct_hv)[0])

            # Find peptide positions
            def find_peptide_pos(peptide):
                matches = list(re.compile(peptide).finditer(protein_struct_seq))
                if not matches:
                    raise PeptideError("Peptide not found")
                if len(matches) > 1:
                    raise PeptideError("Multiple peptide matches found")
                return matches[0].start()

            peptide_positions = {
                'A': find_peptide_pos(row['Peptide A']),
                'B': find_peptide_pos(row['Peptide B'])
            }

            # Calculate residue positions
            def get_residue_pos(residue_str, peptide_start):
                residue_loc = re.findall(r"\d+", residue_str)
                if len(residue_loc) != 1:
                    raise ResidueLocationError("Invalid residue format")
                return int(residue_loc[0]) + peptide_start

            residue_positions = {
                'A': get_residue_pos(row['Residue 1'], peptide_positions['A']),
                'B': get_residue_pos(row['Residue 2'], peptide_positions['B'])
            }

            # Get residues and calculate distance
            residues = {
                'A': chain[residue_positions['A'] - 1],
                'B': chain[residue_positions['B'] - 1]
            }

            ca_atoms = {
                'A': residues['A'].getAtom('CA'),
                'B': residues['B'].getAtom('CA')
            }

            if any(atom is None for atom in ca_atoms.values()):
                raise StructureError("CA atom not found in residue")

            distance = prd.calcDistance(ca_atoms['A'], ca_atoms['B'])

            return {
                'residue-1-start-resindex': residues['A'].getResindex(),
                'residue-2-start-resindex': residues['B'].getResindex(),
                'residue-1-start-resnum': residues['A'].getResnum(),
                'residue-2-start-resnum': residues['B'].getResnum(),
                'residue-distance': distance
            }

        except StructureError as e:
            error_type = e.__class__.__name__.replace('Error', ' Error')
            data_dict['residue-distance'] = error_type
            self.erroneous_XLMS_cif_files_protein_names.append(row['uniprotID'])
            print(f'Unable to compute distance for protein {row["uniprotID"]}: {str(e)}')
            return data_dict

        except AttributeError as e:
            data_dict['residue-distance'] = 'Structure Error'
            self.erroneous_XLMS_cif_files_protein_names.append(row['uniprotID'])
            print(f'Unable to compute distance for protein {row["uniprotID"]}: Structure file is not appropriate. {str(e)}')
            return data_dict

    def calculate_residue_distance_and_betas_all(self):
        """
        Calculate distances and confidence scores for all residue pairs.

        This function processes both AlphaFold structures and manual structures (if available) to compute:
            - Residue-residue distances
            - pLDDT confidence scores from AlphaFold
            - Manual structure distances and residue information

        The function avoids redundant structure loading by processing all metrics for each
        structure in a single pass.
        """
        import prody as prd
        from pathlib import Path

        def process_alphafold_metrics(row):
            """Helper function to process all AlphaFold metrics for a single row."""
            # Clear Ouput to avoid messy output slowing done
            clear_output()

            if row['uniprotID'] not in self.XLMS_proteins_with_structure_info:
                return {'distance': 'N/A', 'beta1': 'N/A', 'beta2': 'N/A'}

            try:
                # Load structure once for all calculations
                protein = row['uniprotID']
                struct_path = Path(self.base_dir) / 'AlphaFold Structures' / protein / f'{protein}.cif'
                protein_struct = prd.parseMMCIF(str(struct_path))
                protein_struct_hv = protein_struct.getHierView()

                # Get residues
                pep_a_pos = int(row['Absolute Peptide A-Pos'])
                pep_b_pos = int(row['Absolute Peptide B-Pos'])
                res_1 = protein_struct_hv.getResidue("A", pep_a_pos)
                res_2 = protein_struct_hv.getResidue("A", pep_b_pos)

                # Get CA atoms and compute metrics
                res_1_ca = res_1["CA"]
                res_2_ca = res_2["CA"]

                return {
                    'distance': prd.calcDistance(res_1_ca, res_2_ca),
                    'beta1': res_1_ca.getBeta(),
                    'beta2': res_2_ca.getBeta()
                }

            except AttributeError:
                print(f'Unable to compute metrics for protein: {row["uniprotID"]}.\nThe structure file is not appropriate.')
                return {'distance': 'N/A', 'beta1': 'N/A', 'beta2': 'N/A'}

        # Process AlphaFold metrics efficiently
        metrics = self.XLMS_DF_NO_DUPES_NO_SHARED.apply(process_alphafold_metrics, axis=1)

        # Update DataFrame with computed metrics
        self.XLMS_DF_NO_DUPES_NO_SHARED["Residue Distance"] = metrics.apply(lambda x: x['distance'])
        self.XLMS_DF_NO_DUPES_NO_SHARED["Residue 1 pLDDT"] = metrics.apply(lambda x: x['beta1'])
        self.XLMS_DF_NO_DUPES_NO_SHARED["Residue 2 pLDDT"] = metrics.apply(lambda x: x['beta2'])

        # Process manual structure data if available
        if self.are_manual_structures_verified:
            manual_struct_data = self.XLMS_DF_NO_DUPES_NO_SHARED.apply(
                lambda row: self._get_manual_structure_distance_data(row), axis=1
            )

            # Update DataFrame with manual structure metrics using dict comprehension
            manual_metrics = {
                'Residue Distance (Manual)': 'residue-distance',
                'Residue 1 Index (Manual)': 'residue-1-start-resindex',
                'Residue 2 Index (Manual)': 'residue-2-start-resindex',
                'Residue 1 Number (Manual)': 'residue-1-start-resnum',
                'Residue 2 Number (Manual)': 'residue-2-start-resnum'
            }

            for df_col, dict_key in manual_metrics.items():
                self.XLMS_DF_NO_DUPES_NO_SHARED[df_col] = manual_struct_data.apply(
                    lambda row: row[dict_key]
                )


    def _get_residue_info_for_duplicates(self, row, protein_id_field, protein_list, column_name):
        """
        Helper function to get residue information for duplicates.

        Args:
            row: DataFrame row
            protein_id_field: Field containing protein ID
            protein_list: List of valid protein IDs
            column_name: Name of column to extract data from

        Returns:
            Value from the specified column or appropriate 'N/A' message
        """
        if row[protein_id_field] in protein_list:
            protein = row[protein_id_field]
            matching_rows = self.XLMS_DF_NO_DUPES_NO_SHARED[
                self.XLMS_DF_NO_DUPES_NO_SHARED[protein_id_field] == protein
            ][column_name]

            if not matching_rows.empty:
                value = matching_rows.iloc[0]
                # Convert to float if it's a distance or pLDDT value and not 'N/A'
                if column_name in ['Residue Distance', 'Residue 1 pLDDT', 'Residue 2 pLDDT'] and value != 'N/A':
                    return float(value)
                return value
            return 'N/A as Shared Peptide'
        return 'N/A'

    def insert_values_for_duplicates(self):
        """
        Insert duplicate values for residue distances and related metrics.

        Processes both AlphaFold and manual structure data (if available) to add:
            - Residue distances
            - pLDDT scores
            - Manual structure measurements
            - Residue numbers and indices
        """
        # Define columns and their corresponding parameters
        alphafold_columns = {
            'Residue Distance': ('uniprotID', self.XLMS_proteins_with_structure_info),
            'Residue 1 pLDDT': ('uniprotID', self.XLMS_proteins_with_structure_info),
            'Residue 2 pLDDT': ('uniprotID', self.XLMS_proteins_with_structure_info)
        }

        # Process AlphaFold data
        for col_name, (id_field, protein_list) in alphafold_columns.items():
            self.XLMS_input[col_name] = self.XLMS_input.apply(
                lambda row: self._get_residue_info_for_duplicates(
                    row, id_field, protein_list, col_name
                ),
                axis=1
            )

        # Process manual structure data if available
        if self.are_manual_structures_verified:
            manual_columns = {
                'Residue Distance (Manual)': ('uniprotID', self.XLMS_cif_files_protein_names),
                'Residue 1 Number (Manual)': ('uniprotID', self.XLMS_cif_files_protein_names),
                'Residue 2 Number (Manual)': ('uniprotID', self.XLMS_cif_files_protein_names),
                'Residue 1 Index (Manual)': ('uniprotID', self.XLMS_cif_files_protein_names),
                'Residue 2 Index (Manual)': ('uniprotID', self.XLMS_cif_files_protein_names)
            }

            for col_name, (id_field, protein_list) in manual_columns.items():
                self.XLMS_input[col_name] = self.XLMS_input.apply(
                    lambda row: self._get_residue_info_for_duplicates(
                        row, id_field, protein_list, col_name
                    ),
                    axis=1
                )

    # Outputting the distances
    def output_distances(self):
        """
        Save all analysis results to output files.

        Generates:
            - Excel file with all cross-link distances
            - CSV file with unique entries
            - Structure-based information
        """

        self.XLMS_input.to_excel(os.path.join(self.base_dir, "xlms_input.xlsx"))
        path = os.path.join(
            self.base_dir,
            pathlib.Path(self.data_file).stem + "_XLMS_Distances_WO_Duplicates.csv",
        )
        print(path)
        self.XLMS_DF_NO_DUPES_NO_SHARED.to_csv(
            os.path.join(
                self.base_dir,
                pathlib.Path(self.data_file).stem + "_XLMS_Distances_WO_Duplicates.csv",
            ),
            index=False,
        )

        pd.Series(self.XLMS_proteins_with_structure_info).to_csv(
            os.path.join(self.base_dir, "xlms_proteins_with_structure_info.csv")
        )
        input_with_distances = pd.read_excel(
            os.path.join(self.base_dir, "xlms_input.xlsx")
        )
        temp_input = input_with_distances[
            input_with_distances["Residue Distance"] != "N/A as Shared Peptide"
        ]
        processed_input = temp_input[temp_input["Residue Distance"] != "N/A"].copy()
        processed_input.fillna(0, inplace=True)
        processed_input.to_excel(
            os.path.join(
                self.base_dir, pathlib.Path(self.data_file).stem + "_XLMS_Output.xlsx"
            ),
            index=False,
        )

        # Define File Name
        self.df_bplt = pd.read_csv(
            os.path.join(
                self.base_dir,
                pathlib.Path(self.data_file).stem + "_XLMS_Distances_WO_Duplicates.csv",
            )
        )
        self.df_hplt = pd.read_excel(
            os.path.join(
                self.base_dir, pathlib.Path(self.data_file).stem + "_XLMS_Output.xlsx"
            )
        )

    def save_barplot(self):
        """
        Generate and save bar plot of residue distances.

        Creates a bar plot showing residue distances with a threshold line based on self.residue_distance_threshold.
        and saves it as a JPEG file.
        """
        Barplot = self.df_bplt.plot.bar(y="Residue Distance", rot=0)
        plt.axhline(y=self.residue_distance_threshold, color="r", linestyle="dashed")
        Barplot.set_title("Distance_Residue Bar_Plot", fontdict={"fontsize": 12})
        # Barplot.set_xlabel("Score_1",fontdict= { 'fontsize': 10})
        Barplot.axes.get_xaxis().set_visible(False)
        Barplot.set_ylabel("Cα-Cα Distance", fontdict={"fontsize": 10})
        plt.savefig(
            os.path.join(
                self.base_dir,
                pathlib.Path(self.data_file).stem + "_XLMS_Distances_Barplot.jpeg",
            ),
            dpi=600,
            bbox_inches="tight",
        )
        print("bp_done")
        plt.close()

    def save_histplot(self):
        """
        Generate and save histogram of residue distances with error handling for small datasets.
        """
        try:
            # Convert 'Residue Distance' to numeric, coercing errors to NaN
            self.df_hplt['Residue Distance'] = pd.to_numeric(self.df_hplt['Residue Distance'], errors='coerce')

            # Drop NaN values and check if we have enough data
            valid_data = self.df_hplt.dropna(subset=['Residue Distance'])
            if len(valid_data) == 0:
                print("Warning: No valid distance data found for histogram")
                return

            # Compute violation status
            valid_data["Cα-Cα Distance status"] = np.where(
                valid_data["Residue Distance"] <= self.residue_distance_threshold,
                "Satisfied",
                "Violated"
            )

            # Save the analysis results
            output_excel_path = os.path.join(
                self.base_dir,
                pathlib.Path(self.data_file).stem + "_XLMS_Final_Output.xlsx"
            )
            valid_data.to_excel(output_excel_path)

            # Create histogram with adjusted number of bins
            plt.figure(figsize=(10, 6))

            # Calculate appropriate number of bins based on data size
            n_bins = min(max(5, len(valid_data) // 2), 140)  # At least 5 bins, at most 140

            Histplot = sns.histplot(
                data=valid_data,
                x="Residue Distance",
                bins=n_bins,  # Adjusted number of bins
                stat="probability",
                hue="Cα-Cα Distance status",
                binwidth=None,  # Let seaborn determine appropriate binwidth
                palette={"Satisfied": "green", "Violated": "red"}
            )

            # Set labels and style
            Histplot.set(
                ylabel="Cross-links",
                xlabel="Cα-Cα Distance"
            )
            plt.ylabel("Cross-links", fontsize=10)
            plt.xlabel("Cα-Cα Distance", fontsize=10)

            # Save the plot
            output_plot_path = os.path.join(
                self.base_dir,
                pathlib.Path(self.data_file).stem + "_XLMS_distances_Histplot.jpeg"
            )
            plt.savefig(output_plot_path, dpi=300, bbox_inches='tight')
            plt.close()

            print(f"Histogram created with {len(valid_data)} valid data points")

        except Exception as e:
            print(f"Error creating histogram: {str(e)}")
            # Create a minimal plot if regular histogram fails
            try:
                plt.figure(figsize=(10, 6))
                plt.text(0.5, 0.5, "Insufficient data for histogram",
                        ha='center', va='center')
                plt.savefig(output_plot_path, dpi=300, bbox_inches='tight')
                plt.close()
            except Exception as e2:
                print(f"Could not create fallback plot: {str(e2)}")

    def visualize_crosslinks(self, is_manual):
        """
        Generate PyMOL visualization of cross-links.

        Args:
            is_manual (bool): If True, use manually uploaded structures;
                            if False, use AlphaFold structures

        Creates:
            - Individual PyMOL sessions for each cross-link
            - Consolidated views of all cross-links per protein
            - Color-coded distance violations (green=satisfied, red=violated)

        TODO:
        Optimize this
        """
        from pymol import cmd
        from pymol.cgo import CYLINDER
        if self.is_visualization_allowed and not is_manual:
            try:
                os.mkdir(os.path.join(self.base_dir ,"PyMOL Sessions"))
            except:
                shutil.rmtree(os.path.join(self.base_dir ,"PyMOL Sessions"))
                os.mkdir(os.path.join(self.base_dir ,"PyMOL Sessions"))
            proteins_with_structure = self.XLMS_proteins_with_structure_info

        elif self.is_visualization_allowed and is_manual:
            try:
                os.mkdir(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions"))
                os.chdir(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions"))
            except:
                shutil.rmtree(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions"))
                os.mkdir(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions"))
                os.chdir(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions"))

            proteins_with_structure = self.XLMS_cif_files_protein_names
        else:
            raise Exception("Error in flow: visualize_alphfold_crosslinks. Contact Development Team.")

        processed_input = pd.read_csv(os.path.join(self.base_dir, pathlib.Path(self.data_file).stem +'_XLMS_Distances_WO_Duplicates.csv'))

        if self.are_manual_structures_verified:
            input_for_pymol = processed_input[[
                'uniprotID',
                'Peptide A',
                'Peptide B',
                'Absolute Peptide A-Pos',
                'Absolute Peptide B-Pos',
                'Residue Distance',
                'Residue Distance (Manual)',
                'Residue 1 Number (Manual)',
                'Residue 2 Number (Manual)'
            ]]
        else:
            input_for_pymol = processed_input[[
                'uniprotID',
                'Peptide A',
                'Peptide B',
                'Absolute Peptide A-Pos',
                'Absolute Peptide B-Pos',
                'Residue Distance',
            ]]

        proteins_processed_counter = {}

        for index, row in input_for_pymol.iterrows():
            protein = row['uniprotID']
            counter = 0

            if protein not in proteins_with_structure:
                print(f"Skipped PyMOL Session generation for {protein}. Reason: Structure not found!")
                continue
            if is_manual and protein in self.erroneous_XLMS_cif_files_protein_names:
                print(f"Skipped PyMOL Session generation for {protein}. Reason: Structure has a problem!")
                continue

            if protein in proteins_processed_counter.keys():
                proteins_processed_counter[protein] += 1
                counter = proteins_processed_counter[protein]
            else:
                proteins_processed_counter[protein] = 0
                counter = proteins_processed_counter[protein]
                if not is_manual:
                    os.mkdir(os.path.join(self.base_dir , "PyMOL Sessions" , protein))
                else:
                    os.mkdir(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions" , protein))

            cmd.reinitialize()
            if not is_manual:
                cmd.load(os.path.join(self.base_dir , 'AlphaFold Structures' , protein , f'{protein}.cif'))
            else:
                cmd.load(os.path.join(self.base_dir, "Uploaded Structures", f'{protein}.cif'))
            cmd.spectrum('b', 'rainbow_r')  # approximate AF coloring
            cmd.bg_color('white')

            colors = {
                'green': [0.0, 1.0, 0.0], # green
                'red': [0.82, 0.0, 0.3]   # dubnium
            }

            radius = 0.5
            selection = 'all'
            atom = 'CA'
            prefix = 'xl'
            if is_manual:
                res_distance = row['Residue Distance']
            else:
                res_distance = row['Residue Distance']
            threshold = self.residue_distance_threshold

            if is_manual:
                x1, y1, z1 = cmd.get_coords(f'{selection} and resi {int(row["Residue 1 Number (Manual)"])} and name {atom}', 1)[0]
                x2, y2, z2 = cmd.get_coords(f'{selection} and resi {int(row["Residue 2 Number (Manual)"])} and name {atom}', 1)[0]

                cmd.distance(f'{selection} and resi {row["Residue 1 Number (Manual)"]} and name {atom}', f'{selection} and resi {row["Residue 2 Number (Manual)"]} and name {atom}')
            else:
                x1, y1, z1 = cmd.get_coords(f'{selection} and resi {int(row["Absolute Peptide A-Pos"])} and name {atom}', 1)[0]
                x2, y2, z2 = cmd.get_coords(f'{selection} and resi {int(row["Absolute Peptide B-Pos"])} and name {atom}', 1)[0]

                cmd.distance(f'{selection} and resi {row["Absolute Peptide A-Pos"]} and name {atom}', f'{selection} and resi {row["Absolute Peptide B-Pos"]} and name {atom}')

            d = np.linalg.norm(np.array([x2, y2, z2]) - np.array([x1, y1, z1]))

            if d <= threshold:

                r1, g1, b1 = colors['green']
                r2, g2, b2 = colors['green']

            else:

                r1, g1, b1 = colors['red']
                r2, g2, b2 = colors['red']

            if is_manual:
                cmd.load_cgo([CYLINDER, x1, y1, z1, x2, y2, z2, radius, r1, g1, b1, r2, g2, b2],
                    f'{prefix}_{row["Residue 1 Number (Manual)"]}_{row["Residue 2 Number (Manual)"]}_{atom}')
            else:
                cmd.load_cgo([CYLINDER, x1, y1, z1, x2, y2, z2, radius, r1, g1, b1, r2, g2, b2],
                    f'{prefix}_{row["Absolute Peptide A-Pos"]}_{row["Absolute Peptide B-Pos"]}_{atom}')

            cmd.group(prefix, f'{prefix}_*')
            #print(res_distance)
            cmd.label(f'{prefix}_*', str(res_distance))
            if not is_manual:
                cmd.save(os.path.join(self.base_dir , "PyMOL Sessions" , protein , f'{protein}-{counter}.pse'))
            else:
                cmd.save(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions" , protein , f'{protein}-{counter}.pse'))
            if proteins_processed_counter[protein] == 0:
                if not is_manual:
                    cmd.save(os.path.join(self.base_dir , "PyMOL Sessions" , protein , f'{protein}-Consolidated.pse'))
                else:
                    cmd.save(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions" , protein , f'{protein}-Consolidated.pse'))
            else:
                cmd.reinitialize()
                if not is_manual:
                    cmd.load(os.path.join(self.base_dir , "PyMOL Sessions" , protein , f'{protein}-Consolidated.pse'))
                else:
                    cmd.load(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions" , protein , f'{protein}-Consolidated.pse'))

                if is_manual:
                    cmd.distance(f'{selection} and resi {row["Residue 1 Number (Manual)"]} and name {atom}',
                                f'{selection} and resi {row["Residue 2 Number (Manual)"]} and name {atom}')
                    cmd.load_cgo([CYLINDER, x1, y1, z1, x2, y2, z2, radius, r1, g1, b1, r2, g2, b2],
                                f'{prefix}_{row["Residue 1 Number (Manual)"]}_{row["Residue 2 Number (Manual)"]}_{atom}_{counter}')
                    cmd.group(prefix, f'{prefix}_*')
                    cmd.label(f'{prefix}_*', str(res_distance))
                else:
                    cmd.distance(f'{selection} and resi {row["Absolute Peptide A-Pos"]} and name {atom}',
                                f'{selection} and resi {row["Absolute Peptide B-Pos"]} and name {atom}')
                    cmd.load_cgo([CYLINDER, x1, y1, z1, x2, y2, z2, radius, r1, g1, b1, r2, g2, b2],
                                f'{prefix}_{row["Absolute Peptide A-Pos"]}_{row["Absolute Peptide A-Pos"]}_{atom}_{counter}')
                    cmd.group(prefix, f'{prefix}_*')
                    cmd.label(f'{prefix}_*', str(res_distance))
                if not is_manual:
                    cmd.save(os.path.join(self.base_dir , "PyMOL Sessions" , protein , f'{protein}-Consolidated.pse'))
                else:
                    cmd.save(os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions" , protein , f'{protein}-Consolidated.pse'))
            if not is_manual:
                print("PyMOL Session saved in " + os.path.join(self.base_dir , "PyMOL Sessions" , protein , f'{protein}-{counter}.pse'))
            else:
                print("PyMOL Session saved in " + os.path.join(self.base_dir, "Uploaded Structures", "PyMOL Sessions" , protein , f'{protein}-{counter}.pse'))

main_obj = AlphaCrossXLBackend()

def alphacrossx_logger(log_type, message):
    # Implement Logger Functionality
    pass

def alphacrossxl_main():
    # Persistent Header Defined First
    app_title = 'AlphaCross-XL (Colab Version)'
    app_version_number = 'v1.0'
    app_update_date = 'Jan 6, 2025'
    app_info = 'Python-based Interactive Tool for Analyzing XL-MS Data-sets and creating useful visualizations.'

    app_header_html = widgets.HTML(value=
    f"""
    <div style='margin:10px'>
    <h1 style='text-align: center;'>{app_title}</h1>
    <h5 style='text-align: center;'>{app_info}</h5>
    <hr>
    <h3><span style='text-align: left;'>Version: {app_version_number}</span><span style='float: right;'>Last Updated on: {app_update_date}</span></h3>
    <hr>
    </div>
    """
    )
    # Aligning Widgets StackOverFlow Reference: https://stackoverflow.com/a/62760915
    centered_box_layout_visible = widgets.Layout(display='flex',
                    flex_flow='column',
                    align_items='center',
                    #width='50%'
                    )
    left_aligned_box_layout_visible = widgets.Layout(display='flex',
                    flex_flow='column',
                    align_items='flex-start',
                    #width='50%'
                    )
    right_aligned_box_layout_visible = widgets.Layout(display='flex',
                    flex_flow='column',
                    align_items='flex-end',
                    #width='50%'
                    )

    # Loading Bar Stuff
    # taken from https://stackoverflow.com/a/62889861
    loading_bar_path = os.path.join(main_base_dir, 'loading-bar.gif')
    loading_bar_url = 'https://drive.usercontent.google.com/uc?id=1319Us_Vh57iYmBVPdwg-Sk4SVcfdPzQS'
    try:
        with open(loading_bar_path, 'rb') as f:
            img = f.read()
    except:
        with open(loading_bar_path, 'wb') as f:
            f.write(requests.get(loading_bar_url).content)
        with open(loading_bar_path, 'rb') as f:
            img = f.read()



    #   create loading bar widget, ready to display when running long function
    loading_bar = widgets.Image(value=img, layout=Layout(max_height='40px'))
    centered_loading_bar = widgets.VBox([loading_bar], layout=centered_box_layout_visible)

    # Output Defined Now but will be displayed just before Footer as persistent widget.
    output_widget = widgets.Output(layout=Layout(overflow='scroll visible',
                                        #border='3px solid black',
                                        width='',
                                        max_height='200px',
                                        flex_flow='column',
                                        display='flex'))

    # Persistent Header (Would Show during all pages.)
    display(app_header_html)

    # Widgets
    ## Page 1 Widgets
    button_start_app = widgets.Button(description="AlphaCross-XL is Initializing. Please Wait.",
                                      layout=Layout(
                                          width='auto',
                                      ),
                                      disabled=True)
    page_start_app = widgets.VBox(children=[button_start_app],layout=centered_box_layout_visible)


    ## Page 2 Widgets
    file_upload_data_widget = widgets.FileUpload(
        description='XL-MS Input File',
        accept='.csv, .xlsx',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
        multiple=False,
        layout={'width': 'auto'}
    )
    file_upload_fasta_db_widget = widgets.FileUpload(
        description='FASTA Database',
        accept='.gz',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
        multiple=False,
        layout={'width': 'auto'}
    )
    labelled_file_upload_data_widget = widgets.HBox(children=[widgets.Label(value="Please Input the XL-MS Data Set (Only .CSV/.XLSX Files Allowed): "), file_upload_data_widget]
                                                    ) #layout none required?
    labelled_file_upload_fasta_db_widget = widgets.HBox(children=[widgets.Label(value="Please Input the compressed FASTA Database (Only .FASTA.GZ Files Allowed): "), file_upload_fasta_db_widget]
                                                    ) #layout none required?
    button_submit_files = widgets.Button(description="Submit Files")
    centered_button_submit_files = widgets.VBox(children=[button_submit_files], layout=centered_box_layout_visible)

    page_input_files = widgets.VBox(children=[labelled_file_upload_data_widget, labelled_file_upload_fasta_db_widget, centered_button_submit_files], layout={'display': 'none'})

    ## Page 3 Widgets
    button_confirm_files = widgets.Button(description="Confirm Files")
    button_cancel_files = widgets.Button(description="Cancel Upload")

    page_confirm_or_cancel_files = widgets.VBox(children=[button_confirm_files, button_cancel_files], layout={'display': 'none'})

    ## Page 4 Widgets
    dropdown_uniprot_id = widgets.Dropdown(
        options=['Options not initialized'],
        description='Choose UniProt ID Column',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )
    dropdown_xlink_types = widgets.Dropdown(
        options=['Options not initialized'],
        description='Choose Cross-Link Type Column',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )
    dropdown_peptide_a = widgets.Dropdown(
        options=['Options not initialized'],
        description='Choose Peptide A Column',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )
    dropdown_peptide_b = widgets.Dropdown(
        options=['Options not initialized'],
        description='Choose Peptide B Column',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )
    dropdown_link_site_a = widgets.Dropdown(
        options=['Options not initialized'],
        description='Choose Link Site A Column',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )
    dropdown_link_site_b = widgets.Dropdown(
        options=['Options not initialized'],
        description='Choose Link Site B Column',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )


    button_confirm_columns = widgets.Button(description="Confirm Input Columns", layout={'width': 'auto'})
    button_go_back_columns = widgets.Button(description="Go Back", layout={'width': 'auto'})

    buttons_input_columns = widgets.HBox(children=[button_confirm_columns, button_go_back_columns])
    #centered_button_confirm_columns = widgets.VBox(children=[button_confirm_columns], layout=centered_box_layout_visible)
    page_input_columns = widgets.VBox(children=[dropdown_uniprot_id, dropdown_xlink_types, dropdown_peptide_a, dropdown_peptide_b, dropdown_link_site_a, dropdown_link_site_b, buttons_input_columns], layout={'display': 'none'})


    ## Page 5 Widgets
    inttext_threshold_dist = widgets.BoundedIntText(
        value=20,
        min=1,
        max=100,
        step=1,
        description='Threshold Distance (in Angstroms, Min - 1, Max - 100):',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'initial'}
    )
    dropdown_xlink_type = widgets.Dropdown(
        options=['Options not initialized'],
        description='Choose Cross-Link Type for Analysis',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )
    button_confirm_analysis_options = widgets.Button(description="Confirm Options", layout={'width': 'auto'})
    button_go_back_analysis_options = widgets.Button(description="Go Back", layout={'width': 'auto'})

    buttons_analysis_options = widgets.HBox(children=[button_confirm_analysis_options, button_go_back_analysis_options])
    #centered_button_confirm_analysis_options = widgets.VBox(children=[button_confirm_analysis_options], layout=centered_box_layout_visible)
    page_analysis_options = widgets.VBox(children=[inttext_threshold_dist, dropdown_xlink_type, buttons_analysis_options], layout={'display': 'none'})

    ## Page 6 Widgets
    radiobutton_visualization = widgets.RadioButtons(
        options=['Yes', 'No'],
        value='Yes',
        description='Do you want to generate PyMOL Visualizations: ',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )
    radiobutton_manual_comparison = widgets.RadioButtons(
        options=['Yes', 'No'],
        value='No',
        description='Do you want to upload your own protein structures for comparison: ',
        disabled=False,
        style={'description_width': 'initial'},
        layout={'width': 'max-content'}
    )
    file_upload_protein_structure_widget = widgets.FileUpload(
        description='Structure Files',
        accept='.zip',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
        multiple=False,
        disabled=True,
    )
    labelled_file_upload_protein_structure_widget = widgets.HBox(children=[widgets.Label(value="Please Input your own Protein Structure Files (.cif only) in a .ZIP Archive: "), file_upload_protein_structure_widget]
                                                                )

    button_confirm_visualization_options = widgets.Button(description="Confirm Visualization Options", layout={'width': 'auto'})
    button_go_back_visualization_options = widgets.Button(description="Go Back", layout={'width': 'auto'})

    buttons_visualization_options = widgets.HBox(children=[button_confirm_visualization_options, button_go_back_visualization_options])
    #centered_button_confirm_visualization_options = widgets.VBox(children=[button_confirm_visualization_options], layout=centered_box_layout_visible)
    page_visualization_options = widgets.VBox(children=[radiobutton_visualization, radiobutton_manual_comparison, labelled_file_upload_protein_structure_widget, buttons_visualization_options], layout={'display': 'none'})

    ## Page 7 Widgets - Processing Confirm
    button_preview = widgets.Button(description="Start Analysis", layout={'width': 'auto'})
    button_go_back_preview = widgets.Button(description="Go Back", layout={'width': 'auto'})
    buttons_preview = widgets.HBox(children=[button_preview, button_go_back_preview])

    page_preview = widgets.VBox(
        children=[
            file_upload_data_widget,
            file_upload_fasta_db_widget,
            dropdown_uniprot_id,
            dropdown_xlink_types,
            dropdown_peptide_a,
            dropdown_peptide_b,
            dropdown_link_site_a,
            dropdown_link_site_b,
            inttext_threshold_dist,
            dropdown_xlink_type,
            radiobutton_visualization,
            radiobutton_manual_comparison,
            file_upload_protein_structure_widget,
            buttons_preview
        ],
        layout={'display': 'none'}
    )
    ## Page 8 Widgets
    page_result = widgets.HTML(value=
    """
    <div style='margin:10px'>
    <button>
    </div>
    """
    )


    # Display Sequence
    ## Page 1 Display Sequence
    display(page_start_app)

    ## Page 2 Display Sequence
    display(page_input_files)

    ## Page 3 Display Sequence
    display(page_confirm_or_cancel_files)

    ## Page 4 Display Sequence
    display(page_input_columns)

    ## Page 5 Display Sequence
    display(page_analysis_options)

    ## Page 6 Display Sequence
    display(page_visualization_options)

    ## Page 7 Display Sequence
    #display(page_preview)
    display(page_preview)

    # ..


    # Widget Handler Functions
    ## Page 1 Widget Handler Functions
    def on_click_button_reset(b):
        with output_widget:
            clear_output()
            # Clear All Files.
            # Get back to Start Page.
            print('Resetting AlphaCross-XL.')

    def on_click_button_start(b):
        with output_widget:
            clear_output()
            page_start_app.layout.display = 'none'
            page_input_files.layout.display = 'block'
            centered_button_submit_files.layout = {'display': 'flex', 'flex_flow':'column',
                    'align_items': 'center'}
            print("Started. Awaiting Files.")

    ## Page 2 Widget Handler Functions
    def on_click_button_submit_files(b):
        with output_widget:
            clear_output()
            print('Verifying Submitted Files.')
            if file_upload_data_widget.value and file_upload_fasta_db_widget.value:
                page_input_files.layout.display = 'none'
                page_confirm_or_cancel_files.layout = {'display': 'flex', 'flex_flow':'column',
                    'align_items': 'center'}

                data_filename, data_content = next(iter(file_upload_data_widget.value.items()))
                print("Data File: ", data_filename)
                db_filename, db_content = next(iter(file_upload_fasta_db_widget.value.items()))
                print("Database File: ", db_filename)

                print("Make sure your data file is of correct format and FASTA Database is of correct species.")
                print("If selected files are correct, press confirm to continue.")

            else:
                clear_output()
                print("Please upload both input files together. Try again")

    ## Page 3 Widget Handler Functions
    def on_click_button_confirm_files(b):
        with output_widget:
            clear_output()
            os.chdir(main_base_dir)
            main_obj.base_dir = os.getcwd()
            main_obj.base_dir_parent = main_base_dir_parent
            #print("BASE DIRRRR", base_dir)
            #main_obj = AlphaCrossXL(base_dir)
            page_confirm_or_cancel_files.layout.display = 'none'
            print('Files Confirmed. Uploading Files.')
            data_filename, data_content = next(iter(file_upload_data_widget.value.items()))
            print("Data File: ", data_filename)
            db_filename, db_content = next(iter(file_upload_fasta_db_widget.value.items()))
            print("Database File: ",db_filename)

            data_file_path = os.path.join(main_obj.base_dir, data_filename)
            db_path = os.path.join(main_obj.base_dir, db_filename)

            with open(data_file_path, 'wb') as f:
                f.write(data_content['content'])
            with open(db_path, 'wb') as f:
                f.write(db_content['content'])

            if os.path.exists(data_file_path) and os.path.exists(db_path):
                print("Files saved successfully. Proceeding to Column Selection.")
            else:
                raise Exception("Failed to save files. Contact Developer Team.")

            main_obj.data_file = data_file_path
            main_obj.fasta_db = db_path
            main_obj.initialize_input_file()
            main_obj.process_fasta()
            input_file_columns = main_obj.get_input_file_columns()
            #input_x_link_types = main_obj.get_input_xlink_types()
            print("Input File Columns: ", input_file_columns)
            print("Please choose corresponding columns in input file.")

            dropdown_uniprot_id.options = input_file_columns
            dropdown_xlink_types.options = input_file_columns
            dropdown_peptide_a.options = input_file_columns
            dropdown_peptide_b.options = input_file_columns
            dropdown_link_site_a.options = input_file_columns
            dropdown_link_site_b.options = input_file_columns

            #main_obj.threshold_dist = inttext_threshold_dist.value
            #print(f"Upload Process completed. Saved input files to {base_dir}.")
            #print("You can find the files at /content")
            page_input_columns.layout = {'display': 'flex', 'flex_flow':'column',
                    'align_items': 'center'}


    def on_click_button_cancel_files(b):
        with output_widget:
            clear_output()
            print('Canceled Operation. Please Start Again.')

            # Easy Fix for Faulty File Counter in ipywidgets 7.7.1
            file_upload_data_widget._counter = 0
            file_upload_data_widget.value.clear()
            file_upload_fasta_db_widget._counter = 0
            file_upload_fasta_db_widget.value.clear()

            page_confirm_or_cancel_files.layout.display = 'none'
            page_input_files.layout.display = 'block'

    ## Page 4 Widget Handler Functions
    def on_click_button_confirm_columns(b):
        with output_widget:
            clear_output()
            user_chosen_columns_dict = {
                "uniprotID": dropdown_uniprot_id.value,
                "X-link type": dropdown_xlink_types.value,
                "Peptide A": dropdown_peptide_a.value,
                "Residue 1": dropdown_link_site_a.value,
                "Residue 2": dropdown_link_site_b.value,
                "Peptide B": dropdown_peptide_b.value,
            }
            if (len(user_chosen_columns_dict.values()) != len(set(user_chosen_columns_dict.values()))):
                print("You have chosen the same columns for one particular column type.")
                print("Please choose unique columns for each type.")
            else:
                print("Columns are Verified")
                main_obj.set_input_file_columns(user_chosen_columns_dict=user_chosen_columns_dict, reset=False)
                page_input_columns.layout.display = 'none'
                dropdown_xlink_type.options = main_obj.get_input_xlink_types()
                page_analysis_options.layout = {'display': 'flex', 'flex_flow':'column',
                        'align_items': 'center'}
                print('Input Columns Confirmed. Proceeding to Analysis Options.')
                print("Please choose analysis options.")



    def on_click_button_go_back_columns(b):
        with output_widget:
            clear_output()
            # Deleting Submitted Files to Preserve Memory
            current_data_file_path = main_obj.data_file
            current_fasta_db_path = main_obj.fasta_db
            os.remove(current_data_file_path)
            os.remove(current_fasta_db_path)
            os.remove(main_obj.fasta_db + '.fxi') #pyfastx index file

            # Resetting Main Object
            main_obj.__init__()
            print('Canceled Operation. Please Start Again.')

            # Easy Fix for Faulty File Counter in ipywidgets 7.7.1
            file_upload_data_widget._counter = 0
            file_upload_data_widget.value.clear()
            file_upload_fasta_db_widget._counter = 0
            file_upload_fasta_db_widget.value.clear()

            dropdown_uniprot_id.options = ['Options not initialized']
            dropdown_xlink_types.options = ['Options not initialized']
            dropdown_peptide_a.options = ['Options not initialized']
            dropdown_peptide_b.options = ['Options not initialized']
            dropdown_link_site_a.options = ['Options not initialized']
            dropdown_link_site_b.options = ['Options not initialized']

            page_input_columns.layout.display = 'none'
            page_input_files.layout.display = 'block'


    ## Page 5 Widget Handler Functions
    def on_click_button_confirm_analysis_options(b):
        with output_widget:
            clear_output()
            main_obj.set_input_xlink_type_threshold_dist(
                user_x_link_type_chosen=dropdown_xlink_type.value,
                user_threshold_dist_chosen=inttext_threshold_dist.value
            )
            page_analysis_options.layout.display = 'none'
            print("Analysis Options Confirmed. Proceeding to Visualization Options.")
            page_visualization_options.layout = {'display': 'flex', 'flex_flow':'column',
                    'align_items': 'center'}

            print("Please choose visualization options.")

    def on_click_button_go_back_analysis_options(b):
        with output_widget:
            clear_output()

            user_chosen_columns_dict = {
                "uniprotID": None,
                "X-link type": None,
                "Peptide A": None,
                "Residue 1": None,
                "Residue 2": None,
                "Peptide B": None,
            }
            main_obj.set_input_file_columns(user_chosen_columns_dict=user_chosen_columns_dict, reset=True)
            page_analysis_options.layout.display = 'none'
            page_input_columns.layout = {'display': 'flex', 'flex_flow':'column',
                    'align_items': 'center'}
            dropdown_xlink_type.options = ['Options not initialized']



    ## Page 6 Widget Handler Functions
    def on_click_button_confirm_visualization_options(b):

        with output_widget:
            clear_output()
            if radiobutton_manual_comparison.value == 'Yes' and file_upload_protein_structure_widget.value == {}:
                print('Please upload the .cif files in a single .zip archive.')
                #print('Make sure your .cif files are labelled as per format: {UniProt_ID}.cif
            else:
                if radiobutton_visualization.value == 'Yes':
                    main_obj.is_visualization_allowed = True
                else:
                    main_obj.is_visualization_allowed = False
                if radiobutton_manual_comparison.value == 'Yes':
                    main_obj.is_manual_protein_struct = True
                else:
                    main_obj.is_manual_protein_struct = False

                if main_obj.is_manual_protein_struct:
                    protein_struct_filename, protein_struct_content = next(iter(file_upload_protein_structure_widget.value.items()))
                    print("Protein Structures Archive Uploaded: ", protein_struct_filename)
                    protein_struct_path = os.path.join(main_obj.base_dir, protein_struct_filename)
                    main_obj.manual_protein_struct_file = protein_struct_path
                    with open(protein_struct_path, 'wb') as f:
                        f.write(protein_struct_content['content'])
                    main_obj.verify_and_process_manual_protein_struct_file()
                print("Visualization Options Confirmed. Proceeding to Final Preview.")
                page_visualization_options.layout.display = 'none'
                file_upload_data_widget.disabled = True
                file_upload_fasta_db_widget.disabled = True
                dropdown_uniprot_id.disabled = True
                dropdown_xlink_types.disabled = True
                dropdown_peptide_a.disabled = True
                dropdown_peptide_b.disabled = True
                dropdown_link_site_a.disabled = True
                dropdown_link_site_b.disabled = True
                inttext_threshold_dist.disabled = True
                dropdown_xlink_type.disabled = True
                radiobutton_visualization.disabled = True
                radiobutton_manual_comparison.disabled = True
                file_upload_protein_structure_widget.disabled = True
                page_preview.layout = {'display': 'flex', 'flex_flow':'column',
                        'align_items': 'center'}


    def on_click_button_go_back_visualization_options(b):
        with output_widget:
            clear_output()
            main_obj.set_input_xlink_type_threshold_dist(
                user_x_link_type_chosen=None,
                user_threshold_dist_chosen=None,
                reset=True
            )

            page_visualization_options.layout.display = 'none'
            page_analysis_options.layout = {'display': 'flex', 'flex_flow':'column',
                    'align_items': 'center'}

    def on_change_radiobutton_manual_comparison(change):
        with output_widget:
            if change['new'] == 'Yes':
                file_upload_protein_structure_widget.disabled = False
                print("Please upload your .cif structure files in a single .zip archive.")
                print("Make sure your .cif files are labelled as per format: {UniProt_ID}.cif")
                print("Incorrectly named files will be ignored.")
            else:
                file_upload_protein_structure_widget.disabled = True
                file_upload_protein_structure_widget.value.clear()
                file_upload_protein_structure_widget._counter = 0

    ## Page 7 Widget Handler Functions
    def on_click_button_go_back_preview(b):
        with output_widget:
            clear_output()
            print("All parameters have been confirmed. Analysis will now begin.")
            page_preview.layout.display = 'none'
            if main_obj.is_manual_protein_struct:
                main_obj.verify_and_process_manual_protein_struct_file(reset=True)

            page_visualization_options.layout.display = 'none'
            file_upload_data_widget.disabled = False
            file_upload_fasta_db_widget.disabled = False
            dropdown_uniprot_id.disabled = False
            dropdown_xlink_types.disabled = False
            dropdown_peptide_a.disabled = False
            dropdown_peptide_b.disabled = False
            dropdown_link_site_a.disabled = False
            dropdown_link_site_b.disabled = False
            inttext_threshold_dist.disabled = False
            dropdown_xlink_type.disabled = False
            radiobutton_visualization.disabled = False
            radiobutton_manual_comparison.disabled = False
            file_upload_protein_structure_widget.disabled = False

            page_visualization_options.layout = {'display': 'flex', 'flex_flow':'column',
                    'align_items': 'center'}

    def on_click_button_preview(b):
        with output_widget:
            clear_output()
            display(centered_loading_bar)
            page_preview.layout.display = 'none'

            print("Confirmed all parameters. Starting Analysis! Please Wait...")

            main_obj.convert_to_xlms_format()
            clear_output()
            display(centered_loading_bar)
            print("Converted to suitable formats.")
            # will have to modify this for protein centric
            main_obj.calculate_absolute_chain_pos()
            # will have to modify this for protein centric
            clear_output()
            display(centered_loading_bar)
            print("Obtained Absolute Positions of Link-Sites in Protein Chain.")
            main_obj.proteins_from_alphafold()
            clear_output()
            display(centered_loading_bar)
            print("Downloaded Protein Structures from AlphaFold")
            main_obj.calculate_residue_distance_and_betas_all()
            clear_output()
            display(centered_loading_bar)
            print("Computed Residue Distance and extracting pLDDT.")
            main_obj.insert_values_for_duplicates()
            clear_output()
            display(centered_loading_bar)
            print("Inserted values for duplicates.")
            main_obj.output_distances()
            clear_output()
            display(centered_loading_bar)
            print("Created Output Files")
            main_obj.save_barplot()
            main_obj.save_histplot()
            clear_output()
            display(centered_loading_bar)
            print("Created Plots")
            if main_obj.is_visualization_allowed and main_obj.is_manual_protein_struct:
                main_obj.visualize_crosslinks(is_manual=True)
                main_obj.visualize_crosslinks(is_manual=False)
                clear_output()
                display(centered_loading_bar)
                print("Visualized Crosslinks (Both AlphaFold and Uploaded Structures)")
            elif main_obj.is_visualization_allowed and not main_obj.is_manual_protein_struct:
                main_obj.visualize_crosslinks(is_manual=False)
                clear_output()
                display(centered_loading_bar)
                print("Visualized Crosslinks (AlphaFold Only)")
            else:
                print("No Crosslinks Visualized.")

            print("Analysis Complete!")
            print("Compressing all analysis files into a .zip archive. Please wait...")


            os.chdir(main_obj.base_dir)
            date_string = datetime.date.today().strftime('%Y-%m-%d')
            data_filename = '-' + main_obj.data_file.split("/")[-1].split(".")[0]

            if main_obj.are_manual_structures_verified:
                shutil.make_archive(
                    'acxl-uploaded_structs-data',
                    'zip',
                    os.path.join(main_obj.base_dir, 'Uploaded Structures')
                )
                shutil.rmtree(os.path.join(main_obj.base_dir, 'Uploaded Structures'))
            if main_obj.is_visualization_allowed:
                shutil.make_archive('acxl-alphafold-pymol-sessions', 'zip', os.path.join(main_obj.base_dir, 'PyMOL Sessions'))
                shutil.rmtree(os.path.join(main_obj.base_dir, 'PyMOL Sessions'))

            shutil.make_archive(
                'acxl-alphafold-structures',
                'zip',
                os.path.join(main_obj.base_dir, 'AlphaFold Structures')
            )
            shutil.rmtree(os.path.join(main_obj.base_dir, 'AlphaFold Structures'))
            os.remove(os.path.join(main_obj.base_dir, 'loading-bar.gif'))
            os.chdir(main_obj.base_dir_parent)
            shutil.make_archive(
                'acxl-results-'+ date_string + data_filename,
                'zip',
                main_obj.base_dir
            )
            result_file_path = os.path.join(main_obj.base_dir_parent, 'acxl-results-'+ date_string + data_filename + '.zip')
            clear_output()
            print("Results are stored in: ",result_file_path)
            print("Thank you for using AlphaCross-XL!")
            print("Downloading Results!")
            print("Note: If the download doesn't start automatically, you can download the file manually.")
            print("Note: Click the Folder Icon on the RHS Tool bar to see the directory structure.")
            print("This may take upto 5-10 if Cross-Links are visualized!")
            print("To run the tool again, run this cell again by using the play button or Ctrl/Cmd + Enter")
            files.download(result_file_path)

    # Widget-Handler Bindings
    ## Page 1 Widget-Handler Bindings
    button_start_app.on_click(on_click_button_start)

    ## Page 2
    button_submit_files.on_click(on_click_button_submit_files)

    ## Page 3
    button_cancel_files.on_click(on_click_button_cancel_files)
    button_confirm_files.on_click(on_click_button_confirm_files)

    ## Page 4
    button_confirm_columns.on_click(on_click_button_confirm_columns)
    button_go_back_columns.on_click(on_click_button_go_back_columns)

    ## Page 5
    button_confirm_analysis_options.on_click(on_click_button_confirm_analysis_options)
    button_go_back_analysis_options.on_click(on_click_button_go_back_analysis_options)

    ## Page 6
    button_confirm_visualization_options.on_click(on_click_button_confirm_visualization_options)
    button_go_back_visualization_options.on_click(on_click_button_go_back_visualization_options)
    #radiobutton_visualization.observe(on_change_radiobutton_visualization, names='value')
    radiobutton_manual_comparison.observe(on_change_radiobutton_manual_comparison, names='value')

    ## Page 7
    button_go_back_preview.on_click(on_click_button_go_back_preview)
    button_preview.on_click(on_click_button_preview)

    ## Page 8




    # Processing Functions and Function Calls
    ## Page 1 Processing Functions and Function Calls
    def update_tool_status_idle():
        with output_widget:
            clear_output()
            print("Idle")
    update_tool_status_idle()

    # Formatted Output Widget
    arranged_output = widgets.VBox(children=[widgets.HTML(
                                        value='''<hr><h3 style='text-align: center;'>Console Log (Scroll for Long Outputs)</h3>'''
                                    ), output_widget,]

                                   )
    # Persistent Footer Defined Last
    app_credits = '© 2024-2025 AlphaCross-XL Development Team.<br>This tool was developed as a collaborative project at Proteomics Lab, IIT Bombay and Wiita Lab, UCSF, with assistance from Sali Lab, UCSF.'
    app_footer = widgets.HTML(value=
    f"""
    <div style='margin:0px'>
    <hr>
    <h5 style='text-align: center;'>{app_credits}</h5>
    </div>
    """
    )

    display(arranged_output, app_footer)

    # Most Important Function which needs to be run once window is rendered
    def configure_dependencies():
        with output_widget:
            try:
                clear_output()
                print("Configuring Dependencies.")
                import prody, pymol
                import pyfastx as pyfx
                print("Dependencies Configured!")
                print(pymol.get_version_message())
                print("ProDy Version: ", prody.__version__, ", PyFastX Version: ", pyfx.__version__)
                button_start_app.description = "Click Here to Start AlphaCross-XL"
                button_start_app.disabled = False
                print("AlphaCross-XL is Initialized!")
                print("Base Directory: ", main_base_dir)

                #update_tool_status_idle()
            except:
                #print('time taken to run:',t2-t1)
                button_start_app.description = "AlphaCross-XL is Initializing for the First Time. Please Wait upto 10 Minutes."
                clear_output()
                display(centered_loading_bar)
                print("ProDy and PyFastx are being installed! Please be patient. This takes 1-2 Minutes!")
                try:
                    start_time = time.perf_counter()
                    !pip install biopython==1.79 pyfastx
                    !pip install --no-deps prody
                    check_time = time.perf_counter()
                    clear_output()
                    display(centered_loading_bar)
                    print("Time taken to install ProDy, PyFastx: ", str(datetime.timedelta(seconds=int(check_time - start_time))))
                    print("Installing PyMOL. This will take 6-8 Minutes! Do not close the window.")
                    !apt-get install -yq git build-essential python3-dev libglew-dev \
                    libpng-dev libfreetype6-dev libxml2-dev \
                    libmsgpack-dev python3-pyqt5.qtopengl libglm-dev libnetcdf-dev

                    clear_output()
                    display(centered_loading_bar)
                    check_time = time.perf_counter()
                    print("Time Elapsed: ", str(datetime.timedelta(seconds=int(check_time - start_time)))," minutes.")
                    print("Building PyMOL")

                    # Improvement Idea
                    # Download tar file from GDrive!
                    if os.path.isfile('/tmp/pymol_colab_20230509.tar.gz'):
                        os.remove('/tmp/pymol_colab_20230509.tar.gz')
                    # File ID extracted from the Google Drive link
                    file_id = "1Jf2ydKBKju2OvKou57v8hs-bFlNbIbS7"
                    output_file = "/tmp/pymol_colab_20230509.tar.gz"

                    # Download tar file from Google Drive using gdown
                    if os.path.isfile(output_file):
                        os.remove(output_file)

                    # Using gdown to download the pymol binaries
                    !gdown --id {file_id} -O {output_file}

                    # Extract the tar file
                    !tar -xf {output_file} -C /tmp/

                    # Change directory
                    %cd /tmp/pymol-open-source/

                    !python3 setup.py install
                    os.chdir('/content')
                    import pymol, prody
                    import pyfastx as pyfx
                    clear_output()
                    check_time = time.perf_counter()
                    print("PyMOL is Installed. Dependency Installation Completed.")
                    print("Total Time Elapsed: ", str(datetime.timedelta(seconds=int(check_time - start_time)))," minutes.")
                    print("Dependencies Configured!")
                    button_start_app.description = "Click Here to Start AlphaCross-XL"
                    button_start_app.disabled = False
                    print("AlphaCross-XL is Initialized!")
                    print(pymol.get_version_message())
                    print("ProDy Version: ", prody.__version__, ", PyFastX Version: ", pyfx.__version__)
                    print("Base Directory: ", main_base_dir)

                except:
                    clear_output()
                    print("Initialization Terminated! This shouldn't happen, unless you manually interrupted the execution.")
                    print("Fatal Error Installing Dependenices")
                    button_start_app.description = "Error Initializing AlphaCross-XL Dependencies. Please contact Development Team"
                    #button_start_app.disabled = False
    configure_dependencies()



alphacrossxl_main()


