<a href="https://colab.research.google.com/github/eoinleen/protein-design-final-dir/blob/main/cp_Plotting_RFdiffusion_AF2_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Script Name: Protein Plot Generator
Author: Claude (Anthropic) & Dr Eoin Leen
Date: January 24, 2025
Description:
Creates a PowerPoint presentation visualizing protein design metrics from the RFdiffusion → ProteinMPNN → AlphaFold2 pipeline. Generates six scatter plots (3x2 grid) comparing i_PAE scores against:

iPTM (predicted template modeling)
RMSD (root mean square deviation)
BSA (buried surface area)
Hydrogen bonds
Hydrophobic contacts
Salt bridges

Data Source:

Generated using RFdiffusion protein design pipeline
Analyzed and collated using RFdiffusion_pathway_str_analysis_tool_final.ipynb
References:

RFdiffusion pipeline: https://colab.research.google.com/github/sokrypton/ColabDesign/blob/v1.1.1/rf/examples/diffusion.ipynb
Analysis tool: https://github.com/eoinleen/protein-design-final-dir/blob/main/cp_RFdiffusion_pathway_str_analysis_tool_final.ipynb


Dependencies: python-pptx, pandas, matplotlib, seaborn, openpyxl, google.colab
Input: Excel file containing protein metrics
Output: PowerPoint (.pptx) file with visualizations
"""

!pip install python-pptx

from google.colab import drive
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pptx import Presentation
from pptx.util import Inches, Cm
from pptx.enum.text import PP_ALIGN
import openpyxl

drive.mount('/content/drive')

def create_protein_plots_ppt(excel_path):
    df = pd.read_excel(excel_path)

    prs = Presentation()
    prs.slide_width = Cm(21)
    prs.slide_height = Cm(29.7)

    slide = prs.slides.add_slide(prs.slide_layouts[5])

    fig, axes = plt.subplots(3, 2, figsize=(8.27, 11.69))
    axes = axes.flatten()

    y_vars = ['i_ptm', 'rmsd', 'buried_surface_area',
              'hydrogen_bonds', 'hydrophobic_contacts', 'salt_bridges']
    titles = ['iPTM', 'RMSD', 'Buried Surface Area',
             'Hydrogen Bonds', 'Hydrophobic Contacts', 'Salt Bridges']

    for ax, y_var, title in zip(axes, y_vars, titles):
        sns.scatterplot(data=df, x='i_pae', y=y_var, ax=ax, color='black', marker='x', s=16)  # s=16 for 4-point size
        ax.set_xlabel('i_PAE')
        ax.set_ylabel(title)
        ax.set_title(title)
        ax.set_facecolor('white')

    fig.patch.set_facecolor('white')
    plt.tight_layout()

    output_dir = os.path.dirname(excel_path)
    output_name = os.path.splitext(os.path.basename(excel_path))[0] + '_plots.pptx'
    output_path = os.path.join(output_dir, output_name)

    temp_img = '/content/temp_plots.png'
    plt.savefig(temp_img, bbox_inches='tight', dpi=300, facecolor='white')
    plt.close()

    left = Cm(2)
    top = Cm(2)
    slide.shapes.add_picture(temp_img, left, top)

    prs.save(output_path)
    os.remove(temp_img)

excel_path = '/content/drive/MyDrive/CSV-files-for-mod/20250123/claudeAI-cp-AF2-scores-combined.xlsx'
create_protein_plots_ppt(excel_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


