<a href="https://colab.research.google.com/github/eoinleen/protein-design-final-dir/blob/main/mod_total_analysis_for_ProteinMPNN_evolution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""
RFdiffusion Structure Analysis and Sequence Extraction Tool
========================================================
Created: January 31, 2025
Authors: Original Analysis - Dr. Eoin Leen, University of Leeds
         Visualization & Integration - Claude AI & Dr. Eoin Leen
Version: 2.1

Purpose:
--------
Combined pipeline for:
1. Structural analysis of PDB files
2. Interface analysis for protein-protein interactions
3. Generation of publication-ready visualizations

Input Required:
-------------
1. Directory containing PDB files

Output Generated:
---------------
1. CSV file with structural analysis results
2. Visualizations of structural parameters

Analysis Parameters:
------------------
Structure Analysis Cutoffs:
1. Hydrogen Bonds:
   - Distance cutoff: O-N distance < 3.5 Å
   - Calculated between backbone atoms only
   - Only inter-chain H-bonds counted

2. Salt Bridges:
   - Distance cutoff: < 4.0 Å between any atoms of residue pairs
   - Residue pairs considered:
     * Acidic: ASP, GLU
     * Basic: LYS, ARG, HIS
   - Only inter-chain salt bridges counted

3. Hydrophobic Contacts:
   - Distance cutoff: < 5.0 Å between any atoms of residue pairs
   - Hydrophobic residues considered:
     * ALA, VAL, LEU, ILE, MET, PHE, TRP, PRO
   - Only inter-chain contacts counted

4. Buried Surface Area:
   - Calculated using FreeSASA algorithm
   - Uses default atomic radii from FreeSASA (based on NACCESS/RSA)
   - Process:
     * First calculates SASA for entire complex
     * Then calculates SASA for each chain individually
     * BSA = (Sum of individual chain SASAs - Complex SASA) / 2
   - Units: Å²
   - Inter-chain burial only (interface area)
   - Probe radius: 1.4 Å (water molecule)
   - Resolution: 100 points/atom (FreeSASA default)

Usage:
-----
1. Mount Google Drive in Colab
2. Update pdb_directory path
3. Run script
"""

# Install required packages
!pip install -q biopython pandas freesasa numpy matplotlib seaborn python-pptx plotly kaleido

# Import required libraries
import os
import sys
import time
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files, drive
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Any
from Bio import PDB
from Bio.PDB.PDBIO import PDBIO
from Bio.PDB.Polypeptide import is_aa
from Bio.PDB.Structure import Structure
import freesasa
import numpy as np
import seaborn as sns
from pptx import Presentation
from pptx.util import Inches, Cm, Pt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import traceback
import hashlib

# Custom exception for structure validation
class StructureValidationError(Exception):
    pass

# ===============================
# Structure Analysis Functions
# ===============================

def validate_pdb_file(file_path: str) -> bool:
    """Validates if file exists and has proper PDB format."""
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"PDB file not found: {file_path}")
    try:
        with open(file_path, 'r') as f:
            first_line = f.readline()
            if not any(marker in first_line for marker in ['HEADER', 'ATOM', 'MODEL']):
                raise StructureValidationError(f"Invalid PDB: {file_path}")
    except UnicodeDecodeError:
        raise StructureValidationError(f"Not a valid text file: {file_path}")
    return True

def safe_structure_load(parser: PDB.PDBParser, file_path: str) -> Optional[Structure]:
    """Safely loads PDB structure with error handling."""
    try:
        validate_pdb_file(file_path)
        structure = parser.get_structure('protein', file_path)
        if not list(structure.get_models()):
            raise StructureValidationError("No models")
        if not list(list(structure.get_models())[0].get_chains()):
            raise StructureValidationError("No chains")
        return structure
    except Exception as e:
        print(f"Error loading {file_path}: {str(e)}")
        return None

def calculate_buried_surface_area(pdb_file: str) -> Tuple[Optional[float], Optional[Dict[str, float]]]:
    """Calculates buried surface area between chains."""
    parser = PDB.PDBParser(QUIET=True)
    structure = safe_structure_load(parser, pdb_file)
    if not structure:
        return None, None
    try:
        chains = list(structure.get_chains())
        if len(chains) < 2:
            print(f"Warning: {pdb_file} has fewer than 2 chains")
            return None, None

        combined_structure = freesasa.Structure(pdb_file)
        result = freesasa.calc(combined_structure)
        total_area = result.totalArea()

        chain_areas = {}
        io = PDBIO()
        temp_files = []

        for chain in chains:
            new_structure = PDB.Structure.Structure('temp')
            new_model = PDB.Model.Model(0)
            new_structure.add(new_model)
            new_model.add(chain.copy())

            temp_file = f"temp_chain_{chain.id}.pdb"
            temp_files.append(temp_file)

            io.set_structure(new_structure)
            io.save(temp_file)

            chain_structure = freesasa.Structure(temp_file)
            chain_result = freesasa.calc(chain_structure)
            chain_areas[chain.id] = chain_result.totalArea()

        for temp_file in temp_files:
            if os.path.exists(temp_file):
                os.remove(temp_file)

        total_individual_area = sum(chain_areas.values())
        buried_surface_area = abs(total_individual_area - total_area) / 2
        return buried_surface_area, chain_areas

    except Exception as e:
        print(f"Error calculating BSA for {pdb_file}: {str(e)}")
        return None, None

def calculate_hydrogen_bonds(structure: Structure) -> int:
    """Calculates number of hydrogen bonds between chains."""
    try:
        h_bonds = 0
        for chain1 in structure.get_chains():
            for chain2 in structure.get_chains():
                if chain1.id >= chain2.id:
                    continue
                for res1 in chain1.get_residues():
                    if not is_aa(res1):
                        continue
                    for res2 in chain2.get_residues():
                        if not is_aa(res2):
                            continue
                        if 'O' in res1 and 'N' in res2:
                            distance = res1['O'] - res2['N']
                            if distance < 3.5:
                                h_bonds += 1
        return h_bonds
    except Exception as e:
        print(f"Error calculating H-bonds: {str(e)}")
        return 0
def calculate_hydrophobic_contacts(structure: Structure) -> int:
    """
    Calculates number of hydrophobic contacts between chains.
    Considers residues ALA, VAL, LEU, ILE, MET, PHE, TRP, PRO.
    Contact is counted if distance < 5.0 Å.
    """
    try:
        hydrophobic_residues = {'ALA', 'VAL', 'LEU', 'ILE', 'MET', 'PHE', 'TRP', 'PRO'}
        contacts = 0
        for chain1 in structure.get_chains():
            for chain2 in structure.get_chains():
                if chain1.id >= chain2.id:
                    continue
                for res1 in chain1.get_residues():
                    if not is_aa(res1) or res1.get_resname() not in hydrophobic_residues:
                        continue
                    for res2 in chain2.get_residues():
                        if not is_aa(res2) or res2.get_resname() not in hydrophobic_residues:
                            continue
                        min_distance = float('inf')
                        for atom1 in res1.get_atoms():
                            for atom2 in res2.get_atoms():
                                distance = atom1 - atom2
                                min_distance = min(min_distance, distance)
                        if min_distance < 5.0:
                            contacts += 1
        return contacts
    except Exception as e:
        print(f"Error calculating hydrophobic contacts: {str(e)}")
        return 0

def calculate_salt_bridges(structure: Structure) -> int:
    """
    Calculates number of salt bridges between chains.
    Salt bridge is counted between ASP/GLU and LYS/ARG/HIS if distance < 4.0 Å.
    """
    try:
        acidic = {'ASP', 'GLU'}
        basic = {'LYS', 'ARG', 'HIS'}
        salt_bridges = 0
        for chain1 in structure.get_chains():
            for chain2 in structure.get_chains():
                if chain1.id >= chain2.id:
                    continue
                for res1 in chain1.get_residues():
                    if not is_aa(res1):
                        continue
                    res1_name = res1.get_resname()
                    for res2 in chain2.get_residues():
                        if not is_aa(res2):
                            continue
                        res2_name = res2.get_resname()
                        if ((res1_name in acidic and res2_name in basic) or
                            (res1_name in basic and res2_name in acidic)):
                            min_distance = float('inf')
                            for atom1 in res1.get_atoms():
                                for atom2 in res2.get_atoms():
                                    distance = atom1 - atom2
                                    min_distance = min(min_distance, distance)
                            if min_distance < 4.0:
                                salt_bridges += 1
        return salt_bridges
    except Exception as e:
        print(f"Error calculating salt bridges: {str(e)}")
        return 0

def save_results_as_df(results: List[Dict[str, Any]], output_file: str) -> pd.DataFrame:
    """
    Converts analysis results to DataFrame and saves to CSV.
    Extracts design identifiers from filenames with flexible pattern matching.
    """
    analysis_data = []
    for result in results:
        filename = result['file_name'].replace('.pdb', '')
        try:
            # Handle new filename format (1_bind_0_dldesign_9965_af2pred)
            if 'dldesign_' in filename:
                # Extract design number (9965 from example)
                design_num = int(filename.split('dldesign_')[1].split('_')[0])
                # Use 1 as default variant number or extract from filename if available
                variant_num = 1

                # If there's an actual variant indicator in the filename, use that instead
                if '_af2pred' in filename:
                    pred_parts = filename.split('_af2pred')
                    # If there's a variant number before af2pred
                    if len(pred_parts) > 1 and pred_parts[1].startswith('_v'):
                        try:
                            variant_num = int(pred_parts[1].strip('_v'))
                        except ValueError:
                            pass
            # Handle original filename format (design123_n45)
            elif 'design' in filename and '_n' in filename:
                design_num = int(filename.split('design')[1].split('_')[0])
                variant_num = int(filename.split('_n')[1])
            else:
                # Fallback for unrecognized format
                print(f"Warning: Unrecognized filename format: {filename}")
                # Use filename hash as a unique identifier
                design_num = int(hashlib.md5(filename.encode()).hexdigest(), 16) % 10000
                variant_num = 1

            analysis_data.append({
                'design': design_num,
                'n': variant_num,
                'file_name': result['file_name'],  # Keep original filename for reference
                'buried_surface_area': result['buried_surface_area'] if result['buried_surface_area'] else 0,
                'hydrogen_bonds': result['hydrogen_bonds'],
                'hydrophobic_contacts': result['hydrophobic_contacts'],
                'salt_bridges': result['salt_bridges']
            })
        except Exception as e:
            print(f"Error parsing filename {filename}: {str(e)}")
            continue

    if not analysis_data:
        print("Warning: No valid data extracted from filenames.")
        # Create empty dataframe with required columns to prevent errors
        return pd.DataFrame(columns=['design', 'n', 'file_name', 'buried_surface_area',
                                     'hydrogen_bonds', 'hydrophobic_contacts',
                                     'salt_bridges'])

    df = pd.DataFrame(analysis_data)
    df = df.sort_values(['design', 'n']).reset_index(drop=True)
    df.to_csv(output_file, index=False)
    print(f"Saved structure analysis to {output_file}")
    return df

def merge_with_af2_scores(structure_df: pd.DataFrame, af2_scores_file: str) -> pd.DataFrame:
    """Merges structural analysis results with AF2 scores."""
    try:
        af2_df = pd.read_csv(af2_scores_file)
        merged_df = pd.merge(af2_df, structure_df, on=['design', 'n'], how='left')
        merged_df = merged_df.sort_values(['design', 'n']).reset_index(drop=True)
        return merged_df
    except Exception as e:
        print(f"Error merging with AF2 scores: {str(e)}")
        return structure_df
# ===============================
# Visualization Functions
# ===============================

def create_structure_visualization(df: pd.DataFrame, output_dir: str, timestamp: str):
    """
    Creates structural parameter visualizations.
    """
    output_basename = os.path.basename(output_dir)

    # Create a figure with subplots for each structural parameter
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    axes = axes.flatten()

    # Define parameters and titles
    params = ['buried_surface_area', 'hydrogen_bonds',
              'hydrophobic_contacts', 'salt_bridges']
    titles = ['Buried Surface Area (Å²)', '# of Hydrogen Bonds',
              '# of Hydrophobic Contacts', '# of Salt Bridges']

    # Plot each parameter
    for i, (param, title) in enumerate(zip(params, titles)):
        ax = axes[i]
        sns.barplot(x=df['design'], y=df[param], ax=ax)
        ax.set_title(title)
        ax.set_xlabel('Design Number')
        if i == 0 or i == 2:  # Left side plots
            ax.set_ylabel(title)

        # Add data labels on top of bars
        for p in ax.patches:
            ax.annotate(f"{p.get_height():.1f}",
                      (p.get_x() + p.get_width() / 2., p.get_height()),
                      ha='center', va='center', fontsize=9,
                      xytext=(0, 5), textcoords='offset points')

    plt.tight_layout()
    plot_path = os.path.join(output_dir, f"{output_basename}_{timestamp}_structural_params.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()
    print(f"Saved structural parameter visualization to {plot_path}")

    # Create correlation matrix visualization
    plt.figure(figsize=(10, 8))
    corr_params = ['buried_surface_area', 'hydrogen_bonds',
                   'hydrophobic_contacts', 'salt_bridges']

    # Create a subset dataframe for correlation analysis
    corr_df = df[corr_params]

    # Calculate correlation matrix
    corr_matrix = corr_df.corr()

    # Plot heatmap
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1,
                linewidths=0.5, square=True)
    plt.title('Correlation Matrix of Structural Parameters')
    plt.tight_layout()

    corr_path = os.path.join(output_dir, f"{output_basename}_{timestamp}_correlation.png")
    plt.savefig(corr_path, dpi=300)
    plt.close()
    print(f"Saved correlation matrix to {corr_path}")

    # Create a figure showing all parameters for each design
    plt.figure(figsize=(15, 10))

    # Normalize data for fair comparison
    norm_df = df[params].copy()
    for param in params:
        max_val = norm_df[param].max()
        if max_val > 0:  # Avoid division by zero
            norm_df[param] = norm_df[param] / max_val

    # Add design column
    norm_df['design'] = df['design']

    # Reshape data for grouped bar plot
    plot_df = pd.melt(norm_df, id_vars=['design'],
                     value_vars=params,
                     var_name='Parameter', value_name='Normalized Value')

    # Create grouped bar plot
    sns.barplot(x='design', y='Normalized Value', hue='Parameter', data=plot_df)
    plt.title('Normalized Structural Parameters by Design')
    plt.ylabel('Normalized Value')
    plt.xlabel('Design Number')
    plt.legend(title='Parameter')
    plt.tight_layout()

    norm_path = os.path.join(output_dir, f"{output_basename}_{timestamp}_normalized_params.png")
    plt.savefig(norm_path, dpi=300)
    plt.close()
    print(f"Saved normalized parameters comparison to {norm_path}")

    # Create an interactive HTML visualization using Plotly
    try:
        # Create a figure with subplots
        fig = make_subplots(rows=2, cols=2,
                           subplot_titles=titles,
                           vertical_spacing=0.1,
                           horizontal_spacing=0.05)

        # Add traces for each parameter
        for i, param in enumerate(params):
            row, col = i // 2 + 1, i % 2 + 1

            fig.add_trace(
                go.Bar(x=df['design'].astype(str),
                      y=df[param],
                      name=titles[i],
                      text=df[param].round(1),
                      textposition='auto'),
                row=row, col=col
            )

            # Update axes labels
            fig.update_xaxes(title_text="Design Number", row=row, col=col)
            if col == 1:  # Left side plots
                fig.update_yaxes(title_text=titles[i], row=row, col=col)

        # Update layout
        fig.update_layout(
            title='Structural Parameters Analysis',
            height=800,
            width=1200,
            showlegend=False,
            template='plotly_white'
        )

        # Save as HTML file
        html_path = os.path.join(output_dir, f"{output_basename}_{timestamp}_interactive.html")
        fig.write_html(html_path)
        print(f"Saved interactive visualization to {html_path}")

    except Exception as e:
        print(f"Error creating interactive visualization: {str(e)}")

def create_pptx_summary(df: pd.DataFrame, output_dir: str, timestamp: str):
    """
    Creates a simple PowerPoint summary of the analysis.
    """
    try:
        output_basename = os.path.basename(output_dir)

        # Initialize presentation
        prs = Presentation()
        prs.slide_width = Cm(25.4)
        prs.slide_height = Cm(19.05)

        # Add title slide
        title_slide = prs.slides.add_slide(prs.slide_layouts[0])
        title = title_slide.shapes.title
        subtitle = title_slide.placeholders[1]
        title.text = "Protein-Protein Interface Analysis"
        subtitle.text = f"Generated: {time.strftime('%B %d, %Y')}"

        # Add summary slide
        summary_slide = prs.slides.add_slide(prs.slide_layouts[5])
        title = summary_slide.shapes.title
        title.text = "Analysis Summary"

        # Create a table for summary statistics
        left = Cm(2)
        top = Cm(4)
        width = Cm(20)
        height = Cm(10)

        # Get summary statistics
        stats = {
            'Total structures analyzed': len(df),
            'Average buried surface area': f"{df['buried_surface_area'].mean():.2f} Å²",
            'Max buried surface area': f"{df['buried_surface_area'].max():.2f} Å²",
            'Average hydrogen bonds': f"{df['hydrogen_bonds'].mean():.2f}",
            'Max hydrogen bonds': f"{df['hydrogen_bonds'].max():.0f}",
            'Average hydrophobic contacts': f"{df['hydrophobic_contacts'].mean():.2f}",
            'Max hydrophobic contacts': f"{df['hydrophobic_contacts'].max():.0f}",
            'Average salt bridges': f"{df['salt_bridges'].mean():.2f}",
            'Max salt bridges': f"{df['salt_bridges'].max():.0f}"
        }

        # Find top design by BSA
        top_bsa_idx = df['buried_surface_area'].idxmax()
        top_bsa_design = df.loc[top_bsa_idx, 'design']
        top_bsa_file = df.loc[top_bsa_idx, 'file_name']
        stats['Top design by buried surface area'] = f"Design {top_bsa_design} ({top_bsa_file})"

        # Create table
        rows = len(stats) + 1  # +1 for header
        cols = 2
        table = summary_slide.shapes.add_table(rows, cols, left, top, width, height).table

        # Header row
        table.cell(0, 0).text = "Metric"
        table.cell(0, 1).text = "Value"

        # Add stats to table
        for i, (metric, value) in enumerate(stats.items(), 1):
            table.cell(i, 0).text = metric
            table.cell(i, 1).text = str(value)

        # Format header row
        for cell in table.rows[0].cells:
            cell.fill.solid()
            cell.fill.fore_color.rgb = (200, 200, 200)

        # Save PowerPoint
        pptx_path = os.path.join(output_dir, f"{output_basename}_{timestamp}_summary.pptx")
        prs.save(pptx_path)
        print(f"Saved PowerPoint summary to {pptx_path}")

    except Exception as e:
        print(f"Error creating PowerPoint summary: {str(e)}")
        traceback.print_exc()

# ===============================
# Main Processing Function
# ===============================

def process_multiple_pdb_files(pdb_directory: str, af2_scores_file: str = None) -> pd.DataFrame:
    """
    Main processing function that:
    1. Analyzes all PDB files in directory
    2. Merges with AF2 scores if available
    3. Generates visualizations and outputs
    """
    if not os.path.exists(pdb_directory):
        raise FileNotFoundError(f"Directory not found: {pdb_directory}")

    # Get timestamp for file naming
    timestamp = time.strftime("%y%m%d")

    # Initialize results
    results = []
    parser = PDB.PDBParser(QUIET=True)
    pdb_files = [f for f in os.listdir(pdb_directory) if f.endswith('.pdb')]

    if not pdb_files:
        print(f"No PDB files found in {pdb_directory}")
        return pd.DataFrame()

    print(f"Processing {len(pdb_files)} PDB files...")
    total_files = len(pdb_files)

    # Process each PDB file
    for idx, file_name in enumerate(pdb_files, 1):
        pdb_file = os.path.join(pdb_directory, file_name)
        print(f"Processing file {idx}/{total_files}: {file_name}")

        structure = safe_structure_load(parser, pdb_file)
        if not structure:
            continue

        # Calculate structural parameters
        buried_surface_area, chain_areas = calculate_buried_surface_area(pdb_file)
        h_bonds = calculate_hydrogen_bonds(structure)
        hydrophobic = calculate_hydrophobic_contacts(structure)
        salt_bridges = calculate_salt_bridges(structure)

        results.append({
            'file_name': file_name,
            'buried_surface_area': buried_surface_area,
            'hydrogen_bonds': h_bonds,
            'hydrophobic_contacts': hydrophobic,
            'salt_bridges': salt_bridges,
            'chain_areas': chain_areas
        })

    # Save structural analysis
    output_basename = os.path.basename(pdb_directory)
    structure_csv = os.path.join(pdb_directory, f"{output_basename}_{timestamp}_structure.csv")
    structure_df = save_results_as_df(results, structure_csv)

    # Check if we have valid data
    if structure_df.empty:
        print("Warning: No valid data extracted from analysis")
        return structure_df

    # If AF2 scores exist, merge and create visualizations
    if af2_scores_file and os.path.exists(af2_scores_file):
        print(f"Merging with AF2 scores from {af2_scores_file}")
        try:
            final_df = merge_with_af2_scores(structure_df, af2_scores_file)

            # Save combined analysis
            combined_csv = os.path.join(pdb_directory, f"{output_basename}_{timestamp}_combined.csv")
            final_df.to_csv(combined_csv, index=False)
            print(f"Saved combined results to {combined_csv}")

            # Create PowerPoint plots (original functionality)
            # This has been modified for compatibility with the scenario where 'seq' is not available

            return final_df
        except Exception as e:
            print(f"Error during AF2 score merging: {str(e)}")
            print("Continuing with structure analysis only")

    # Generate simple visualizations for structural parameters only
    try:
        create_structure_visualization(structure_df, pdb_directory, timestamp)
        create_pptx_summary(structure_df, pdb_directory, timestamp)
    except Exception as e:
        print(f"Error creating visualizations: {str(e)}")
        traceback.print_exc()

    return structure_df

# ===============================
# Main Execution
# ===============================

if __name__ == "__main__":
    # Mount Google Drive
    drive.mount('/content/drive')

    # Set directory containing PDB files and AF2 scores
    pdb_directory = '/content/drive/MyDrive/Evolving_hits_using_ProteinMPNN/20250303-3NOBEK/0_top_binders'  # Update this path
    af2_scores_path = os.path.join(pdb_directory, 'af2_scores.csv')

    # Check if AF2 scores exist
    if not os.path.exists(af2_scores_path):
        af2_scores_path = None
        print("No AF2 scores file found - will generate structure analysis only")

    print("\nStarting analysis...")
    print(f"Processing PDB files from: {pdb_directory}")

    try:
        results_df = process_multiple_pdb_files(pdb_directory, af2_scores_path)

        if not results_df.empty:
            print("\nSummary of results:")
            print(f"Total structures analyzed: {len(results_df)}")
            print(f"Average buried surface area: {results_df['buried_surface_area'].mean():.2f} Å²")
            print(f"Average hydrogen bonds: {results_df['hydrogen_bonds'].mean():.2f}")
            print(f"Average hydrophobic contacts: {results_df['hydrophobic_contacts'].mean():.2f}")
            print(f"Average salt bridges: {results_df['salt_bridges'].mean():.2f}")

            print("\nTop structures by buried surface area:")
            top_bsa = results_df.nlargest(3, 'buried_surface_area')
            for idx, row in top_bsa.iterrows():
                print(f"Design {row['design']} - {row['file_name']}: {row['buried_surface_area']:.2f} Å²")

            print("\nTop structures by hydrogen bonds:")
            top_hbonds = results_df.nlargest(3, 'hydrogen_bonds')
            for idx, row in top_hbonds.iterrows():
                print(f"Design {row['design']} - {row['file_name']}: {row['hydrogen_bonds']} bonds")

        print("\nAnalysis completed successfully!")
    except Exception as e:
        print(f"\nError during analysis: {str(e)}")
        traceback.print_exc()  # Print full stack trace for better debugging



[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/270.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━[0m [32m174.1/270.1 kB[0m [31m4.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.1/270.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.8/472.8 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.1/165.1 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for freesasa (setup.py) ... [?25l[?25hdone
Drive already mount

Traceback (most recent call last):
  File "<ipython-input-1-e2c64f869283>", line 538, in create_pptx_summary
    cell.fill.fore_color.rgb = (200, 200, 200)
    ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pptx/dml/color.py", line 62, in rgb
    raise ValueError("assigned value must be type RGBColor")
ValueError: assigned value must be type RGBColor
