In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import re

In [2]:
os.chdir('/home/mpg01/MBPC/fatma.chafra01/ColabDesign/af/examples')
os.getcwd()

'/home/mpg01/MBPC/fatma.chafra01/ColabDesign/af/examples'

In [3]:
def normalize_and_sort(x):
    # Split the string by commas and/or whitespace
    elements = re.split(r'[,\s]+', x)
    # Remove any empty strings
    elements = [e for e in elements if e]
    # Sort the elements
    elements.sort()
    # Join the elements back with a single comma
    return ','.join(elements)


def read_cdr_gen_out_df(filename='file.out', verbose=0): 
    data = []
    stage = 0  # 0: before Stage 1, 1: Stage 1, 2: semigreedy optimization
    stage_model = 0
    
    with open(filename, 'r') as file:
        for line in file:
            if 'Stage 1: running (logits → soft)' in line:
                stage = 1
                stage_model = 1
                if verbose == 1:
                    print('stage', stage)
                    print('stage_model', stage_model)
                continue
            elif 'Running semigreedy optimization...' in line:
                # if previous stage was 1 then stage_model is kept as 1
                if stage == 1:
                    stage_model = 1
                else:
                    stage_model += 1
                stage = 2
                if verbose == 1:
                    print('stage', stage)
                    print('stage_model', stage_model)
                continue
            elif line.startswith('end'):
                break
            
            if stage > 0:
                # Parse the line
                if verbose == 1:
                    print('line', line)
                parts = line.split(' [')
                if verbose == 1:
                    print('parts', parts)
                if len(parts) < 2 or not parts[0].split()[0].isdigit():
                    continue
                if verbose == 1:
                    print('parts', parts)

                iteration = int(parts[0].split()[0])
                if verbose == 1:
                    print('iteration', iteration)
                parts_2 = parts[1].split('] ')
                if verbose == 1:
                    print('parts_2', parts_2)
                model = parts_2[0]
                if verbose == 1:
                    print('model', model)
                parts_3 = parts_2[1].split()
                #print(parts[1])
                #print(parts[2])
                #model = parts[2].strip('[').strip(']')
                #model = int(re.findall(r'\d+', parts[1])[0])
                
                row = {'stage':stage, 'stage_model': stage_model, 'iteration': iteration, 'model': model}
                if verbose == 1:
                    print(row)
                    print('range', range(0, len(parts_3), 2))
                
                for i in range(0, len(parts_3), 2):
                    key = parts_3[i]
                    if verbose == 1:
                        print('key', key)
                    value = float(parts_3[i+1])
                    if verbose == 1:
                        print('value', value)
                    row[key] = value
                
                data.append(row)
    
    df = pd.DataFrame(data)
    print(df['model'].unique())
    print(df['model'].value_counts())
    # print(second_test['model'])
    # df['model_updated'] = df['model'].map(lambda x: ','.join(sorted(x.split())))
    df['model_updated'] = df['model'].apply(normalize_and_sort)
    print(df['model_updated'].value_counts())

    return df

In [4]:
def create_length_masks(df):
    def count_unique_values(value):
        return len(set(value.split(',')))
    
    # Count unique values for each row
    unique_counts = df['model_updated'].apply(count_unique_values)
    
    # Create masks for each length (1 to 5)
    masks = [unique_counts == i for i in range(1, 6)]
    
    return masks


In [5]:
def plot_interface_losses(df, outpath, prefix, verbose=0):
    # Turn off interactive mode at the start of the function
    plt.ioff()
    print(df['model_updated'].value_counts())
    unique_values = df['model_updated'].unique()
    
    for val in unique_values:
        # interface losses
        # Create a figure with two subplots
        fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 18))

        # Plot iteration vs loss
        df_subset = df[df['model_updated'] == val]
        # df[df['model_updated']==val].plot(x='iteration', y='loss', ax=ax1, kind='line')
        ax1.scatter(df_subset['iteration'], df_subset['loss'], label='Loss', color='b')
        ax1.set_title('Iteration vs Loss')
        ax1.set_xlabel('Iteration')
        ax1.set_ylabel('Loss')

        # Plot iteration vs i_pae
        # df[df['model_updated']==val].plot(x='iteration', y='i_pae', ax=ax2, kind='line')
        ax2.scatter(df_subset['iteration'], df_subset['i_pae'], label='i_pae', color='g')
        ax2.set_title('Iteration vs iPAE')
        ax2.set_xlabel('Iteration')
        ax2.set_ylabel('iPAE')

        # Plot iteration vs i_ptm
        # df[df['model_updated']==val].plot(x='iteration', y='i_ptm', ax=ax3, kind='line')
        ax3.scatter(df_subset['iteration'], df_subset['i_ptm'], label='i_ptm', color='g')
        ax3.set_title('Iteration vs interface pTM')
        ax3.set_xlabel('Iteration')
        ax3.set_ylabel('i_ptm') 
        

        #fig.subplots_adjust(top=0.99)
        # Add an overall title to the figure
        val_formatted = val.strip()
        print('val_formatted', val_formatted)
        fig.suptitle(f'Loss Function and Interface Metrics for model {val_formatted}', fontsize=16, wrap=True)



        # Adjust layout and display the plot
        plt.tight_layout()
        # Adjust the layout to make room for the overall title
        fig.subplots_adjust(top=0.95, bottom=0.03, left=0.1, right=0.95, hspace=0.2)
        # save plot
        plt.savefig(f'{outpath}/{prefix}_interface_losses_model_{val_formatted}.png', bbox_inches='tight', dpi=300)
        # Close the figure to free up memory
        plt.close(fig)
         # Turn interactive mode back on at the end of the function
        plt.ion()
        if verbose == 1:
            plt.show()

In [6]:
# function for grouping the same num of models into one graph for easier interpretation
def plot_interface_losses_3(df, outpath, prefix, verbose=0):
    # Turn off interactive mode at the start of the function
    plt.ioff()
    print(df['model_updated'].value_counts())
    # unique_values = df['model_updated'].unique()
    masks = create_length_masks(df)
    # Create a colormap
    cmap = plt.get_cmap('tab10')  # You can choose a different colormap if needed

    # Define the columns to plot and their properties
    plot_columns = [
        ('loss', 'Loss', 'Iteration vs Loss'),
        ('i_pae', 'iPAE', 'Iteration vs iPAE'),
        ('i_ptm', 'i_ptm', 'Iteration vs interface pTM')
    ]

    for i, mask in enumerate(masks, start=1):
        df_subset = df[mask]
        
        # Filter out columns that don't exist in the DataFrame
        available_plots = [plot for plot in plot_columns if plot[0] in df_subset.columns]
        num_plots = len(available_plots)
        
        # Create a figure with the appropriate number of subplots
        fig, axes = plt.subplots(num_plots, 1, figsize=(10, 6*num_plots))
        if num_plots == 1:
            axes = [axes]  # Ensure axes is always a list

        model_combinations = df_subset['model_updated'].unique()
        color_dict = {combo: cmap(j/len(model_combinations)) for j, combo in enumerate(model_combinations)}

        for ax, (col_name, ylabel, title) in zip(axes, available_plots):
            for combo in model_combinations:
                combo_data = df_subset[df_subset['model_updated'] == combo]
                color = color_dict[combo]
                ax.scatter(combo_data['iteration'], combo_data[col_name], label=combo, color=color)
            
            ax.set_title(title)
            ax.set_xlabel('Iteration')
            ax.set_ylabel(ylabel)

        # Add legend to the first subplot
        axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')

        fig.suptitle(f'Loss Function and Interface Metrics for {i} model(s)', fontsize=14, wrap=True)
        
        plt.tight_layout()
        fig.subplots_adjust(top=0.94, right=0.85, hspace=0.3)
        plt.savefig(f'{outpath}/{prefix}_interface_losses_num_models_{i}.png', bbox_inches='tight', dpi=300)
        plt.close(fig)

    plt.ion()
    if verbose == 1:
        plt.show()

In [7]:
# function for plotting interface losses for each model group separately 
# but for the most general case of when we dont know whether all of the losses are in the .out file
def plot_interface_losses_2(df, outpath, prefix, verbose=0):
    plt.ioff()  # Turn off interactive mode
    print(df['model_updated'].value_counts())
    unique_values = df['model_updated'].unique()
    
    # Define the columns to plot and their properties
    plot_columns = [
        ('loss', 'Loss', 'b'),
        ('i_pae', 'iPAE', 'g'),
        ('i_ptm', 'interface pTM', 'r')
    ]
    
    for val in unique_values:
        df_subset = df[df['model_updated'] == val]
        
        # Filter out columns that don't exist in the DataFrame
        available_plots = [plot for plot in plot_columns if plot[0] in df.columns]
        num_plots = len(available_plots)
        
        # Create a figure with the appropriate number of subplots
        fig, axes = plt.subplots(num_plots, 1, figsize=(10, 6*num_plots))
        if num_plots == 1:
            axes = [axes]  # Ensure axes is always a list
        
        for ax, (col_name, title, color) in zip(axes, available_plots):
            ax.scatter(df_subset['iteration'], df_subset[col_name], label=title, color=color)
            ax.set_title(f'Iteration vs {title}')
            ax.set_xlabel('Iteration')
            ax.set_ylabel(title)
        
        val_formatted = val.strip()
        print(f'val_formatted: {val_formatted}')
        fig.suptitle(f'Loss Function and Interface Metrics for model {val_formatted}', fontsize=16)
        
        plt.tight_layout()
        fig.subplots_adjust(top=0.95, hspace=0.3)
        
        plt.savefig(f'{outpath}/{prefix}_interface_losses_model_{val_formatted}.png', bbox_inches='tight', dpi=300)
        plt.close(fig)

    plt.ion()  # Turn interactive mode back on

    if verbose == 1:
        plt.show()

In [8]:
# initial function to get the loss curves without considering columns can be missing
def plot_folding_structure_losses(df, outpath, prefix, verbose=0):
    # Turn off interactive mode at the start of the function
    plt.ioff()
    print(df['model_updated'].value_counts())
    unique_values = df['model_updated'].unique()
    for val in unique_values:
            
        # structure and folding losses
        # Create a figure with two subplots
        fig, (ax1, ax2, ax3, ax4, ax5, ax6) = plt.subplots(6, 1, figsize=(10, 32))

        # Plot iteration vs loss
        # df[df['model_updated']==val].plot(x='iteration', y='loss', ax=ax1, kind='line')
        df_subset = df[df['model_updated'] == val]
        ax1.scatter(df_subset['iteration'], df_subset['loss'], label='Loss', color='b')
        ax1.set_title('Iteration vs Loss')
        ax1.set_xlabel('Iteration')
        ax1.set_ylabel('Loss')

        # Plot iteration vs RMSD
        # df[df['model_updated']==val].plot(x='iteration', y='rmsd', ax=ax2, kind='line')
        ax2.scatter(df_subset['iteration'], df_subset['rmsd'], label='RMSD', color='g')
        ax2.set_title('Iteration vs RMSD')
        ax2.set_xlabel('Iteration')
        ax2.set_ylabel('RMSD')

        # Plot iteration vs dgram_cce
        # df[df['model_updated']==val].plot(x='iteration', y='dgram_cce', ax=ax3, kind='line')
        ax3.scatter(df_subset['iteration'], df_subset['dgram_cce'], label='dgram_cce', color='r')
        ax3.set_title('Iteration vs dgram_cce')
        ax3.set_xlabel('Iteration')
        ax3.set_ylabel('dgram_cce')

        # Plot iteration vs plddt
        # df[df['model_updated']==val].plot(x='iteration', y='plddt', ax=ax4, kind='line')
        ax4.scatter(df_subset['iteration'], df_subset['plddt'], label='plddt', color='c')
        ax4.set_title('Iteration vs plddt')
        ax4.set_xlabel('Iteration')
        ax4.set_ylabel('plddt')

        # Plot iteration vs pae
        # df[df['model_updated']==val].plot(x='iteration', y='pae', ax=ax5, kind='line')
        ax5.scatter(df_subset['iteration'], df_subset['pae'], label='PAE', color='m')
        ax5.set_title('Iteration vs PAE')
        ax5.set_xlabel('Iteration')
        ax5.set_ylabel('PAE')

        # Plot iteration vs pTM
        # df[df['model_updated']==val].plot(x='iteration', y='ptm', ax=ax6, kind='line')
        ax6.scatter(df_subset['iteration'], df_subset['ptm'], label='pTM', color='y')
        ax6.set_title('Iteration vs pTM')
        ax6.set_xlabel('Iteration')
        ax6.set_ylabel('pTM')

        #fig.subplots_adjust(top=0.99)
        # Add an overall title to the figure
        val_formatted = val.strip()
        print('val_formatted', val_formatted)
        fig.suptitle(f'Loss Function, Folding and Structure Matching Metrics for model {val_formatted}', fontsize=16, wrap=True)



        # Adjust layout and display the plot
        plt.tight_layout()
        # Adjust the layout to make room for the overall title
        fig.subplots_adjust(top=0.95, bottom=0.03, left=0.1, right=0.95, hspace=0.2)
        # save plot
        plt.savefig(f'{outpath}/{prefix}_folding_structure_losses_model_{val_formatted}.png', bbox_inches='tight', dpi=300)
        # Close the figure to free up memory
        plt.close(fig)
         # Turn interactive mode back on at the end of the function
        plt.ion()
        if verbose == 1:
            plt.show()

In [9]:
# function to get the loss curves considering some of the columns might be missing
def plot_folding_structure_losses_2(df, outpath, prefix, verbose=0):
    plt.ioff()  # Turn off interactive mode
    print(df['model_updated'].value_counts())
    unique_values = df['model_updated'].unique()
    
    # Define the columns to plot and their properties
    plot_columns = [
        ('loss', 'Loss', 'b'),
        ('rmsd', 'RMSD', 'g'),
        ('dgram_cce', 'dgram_cce', 'r'),
        ('plddt', 'plddt', 'c'),
        ('pae', 'PAE', 'm'),
        ('ptm', 'pTM', 'y')
    ]
    
    for val in unique_values:
        df_subset = df[df['model_updated'] == val]
        
        # Filter out columns that don't exist in the DataFrame
        available_plots = [plot for plot in plot_columns if plot[0] in df.columns]
        num_plots = len(available_plots)
        
        # Create a figure with the appropriate number of subplots
        fig, axes = plt.subplots(num_plots, 1, figsize=(10, 5*num_plots))
        if num_plots == 1:
            axes = [axes]  # Ensure axes is always a list
        
        for ax, (col_name, title, color) in zip(axes, available_plots):
            ax.scatter(df_subset['iteration'], df_subset[col_name], label=title, color=color)
            ax.set_title(f'Iteration vs {title}')
            ax.set_xlabel('Iteration')
            ax.set_ylabel(title)
        
        val_formatted = val.strip()
        print(f'val_formatted: {val_formatted}')
        fig.suptitle(f'Loss Function, Folding and Structure Matching Metrics for model {val_formatted}', fontsize=16)
        
        plt.tight_layout()
        fig.subplots_adjust(top=0.95, hspace=0.3)
        
        plt.savefig(f'{outpath}/{prefix}_folding_structure_losses_model_{val_formatted}.png', bbox_inches='tight', dpi=300)
        plt.close(fig)

    plt.ion()  # Turn interactive mode back on

    if verbose == 1:
        plt.show()

In [10]:
def plot_folding_structure_losses_3(df, outpath, prefix, verbose=0):
    plt.ioff()  # Turn off interactive mode
    print(df['model_updated'].value_counts())
    masks = create_length_masks(df)  # Use length masks to group rows based on unique values in 'model_updated'
    cmap = plt.get_cmap('tab10')  # Define a colormap for coloring
    
    # Define the columns to plot and their properties
    plot_columns = [
        ('loss', 'Loss', 'Iteration vs Loss'),
        ('rmsd', 'RMSD', 'Iteration vs RMSD'),
        ('dgram_cce', 'dgram_cce', 'Iteration vs dgram_cce'),
        ('plddt', 'plddt', 'Iteration vs pLDDT'),
        ('pae', 'PAE', 'Iteration vs PAE'),
        ('ptm', 'pTM', 'Iteration vs pTM')
    ]
    
    for i, mask in enumerate(masks, start=1):
        df_subset = df[mask]
        
        # Filter out columns that don't exist in the DataFrame
        available_plots = [plot for plot in plot_columns if plot[0] in df_subset.columns]
        num_plots = len(available_plots)
        
        # Create a figure with the appropriate number of subplots
        fig, axes = plt.subplots(num_plots, 1, figsize=(10, 5*num_plots))
        if num_plots == 1:
            axes = [axes]  # Ensure axes is always a list

        # Get unique model combinations for this subset
        model_combinations = df_subset['model_updated'].unique()
        
        # Create a color dictionary for this subset
        color_dict = {combo: cmap(j / len(model_combinations)) for j, combo in enumerate(model_combinations)}
        
        for ax, (col_name, ylabel, title) in zip(axes, available_plots):
            for combo in model_combinations:
                combo_data = df_subset[df_subset['model_updated'] == combo]
                color = color_dict[combo]
                ax.scatter(combo_data['iteration'], combo_data[col_name], label=combo, color=color)
            
            ax.set_title(title)
            ax.set_xlabel('Iteration')
            ax.set_ylabel(ylabel)

        # Add legend to the first subplot
        axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')

        fig.suptitle(f'Loss Function and Structure Matching Metrics for number of models {i}', fontsize=16)
        
        plt.tight_layout()
        fig.subplots_adjust(top=0.95, right=0.85, hspace=0.3)
        
        plt.savefig(f'{outpath}/{prefix}_folding_structure_losses_num_models_{i}.png', bbox_inches='tight', dpi=300)
        plt.close(fig)

    plt.ion()  # Turn interactive mode back on

    if verbose == 1:
        plt.show()


In [55]:
test = read_cdr_gen_out_df('/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3/antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_10_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True.out')
print(test['model_updated'].unique())
#plot_interface_losses_3(test, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3/', 'test_antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_10_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True', verbose=0)
plot_folding_structure_losses_3(test, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3/', 'test_antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_10_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True', verbose=0)

['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
['0' '1' '2' '0,1' '0,2' '0,1,2']
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


In [59]:
directory = '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3/'
out_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.out')]
for file in out_files:
    df = read_cdr_gen_out_df(filename=file)
    file_prefix = file.split('8ee2_3/')[-1].split('.out')[0]
    print(file_prefix)
    plot_folding_structure_losses_3(df, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3', file_prefix)
    plot_interface_losses_3(df, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3', file_prefix)

['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_12_learning_rate_0.001_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_11_learning_rate_0.001_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_11_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_10_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_10_learning_rate_0.001_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_12_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


In [12]:
directory = '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3/'
out_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.out')]
for file in out_files:
    # because ran it for the 10, 11,12 weights test instances before
    if file.split('weights_test_')[-1].split('_')[0] != '10' and file.split('weights_test_')[-1].split('_')[0] != '11' and file.split('weights_test_')[-1].split('_')[0] != '12':
        # print(file)
        df = read_cdr_gen_out_df(filename=file)
        file_prefix = file.split('8ee2_3/')[-1].split('.out')[0]
        print(file_prefix)
        plot_folding_structure_losses_3(df, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3', file_prefix)
        plot_interface_losses_3(df, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3', file_prefix)


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_2_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_6_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_4_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_0_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_7_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_8_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_3_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_6_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_2_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_0_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_8_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_4_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_9_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_5_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_1_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_9_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_5_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_1_learning_rate_0.005_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_3_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_7_learning_rate_0.01_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64


  axes[0].legend(title='Model Combinations', bbox_to_anchor=(1.05, 1), loc='upper left')


In [60]:
directory = '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_2/'
out_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.out')]
for file in out_files:
    df = read_cdr_gen_out_df(filename=file)
    file_prefix = file.split('8ee2_2/')[-1].split('.out')[0]
    print(file_prefix)
    plot_folding_structure_losses_3(df, directory, file_prefix)
    plot_interface_losses_3(df, directory, file_prefix)

['1' '3' '4' '0' '2' '0 1' '1 0' '2 0' '4 2' '0 4' '2 1' '3 0' '0 2' '4 1'
 '4 0' '1 4' '4 3' '1 2' '3 1 0' '0 4 2' '4 3 2' '2 4 3' '4 2 1' '0 2 3'
 '2 1 0' '0 2 1' '4 0 3 2' '2 0 3 4' '4 3 1 2' '4 1 3 0' '0 1 3 4'
 '3 4 1 2' '2 3 1 0' '0, 1, 2, 3, 4']
model
1                39
3                31
2                28
4                27
0                27
0, 1, 2, 3, 4     8
0 1               3
4 2               2
4 0 3 2           2
1 0               1
2 1               1
3 0               1
0 2               1
4 1               1
4 0               1
1 4               1
2 0               1
0 4               1
1 2               1
4 3               1
3 1 0             1
0 4 2             1
4 2 1             1
0 2 3             1
4 3 2             1
2 4 3             1
0 2 1             1
2 1 0             1
4 3 1 2           1
2 0 3 4           1
4 1 3 0           1
0 1 3 4           1
3 4 1 2           1
2 3 1 0           1
Name: count, dtype: int64
model_updated
1            39
3    

In [None]:
# example
filename='/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_2/antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_2_models_5_recycles_3_use_templates_True_rm_templates_ic_False_bias_True.out'
df = read_cdr_gen_out_df(filename=filename)
print(df)
file_prefix = filename.split('8ee2_2/')[-1].split('.')[0]
print(file_prefix)
plot_folding_structure_losses(df, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_2', file_prefix)
plot_interface_losses(df, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_2', file_prefix)

['1' '3' '4' '0' '2' '0 1' '1 0' '2 0' '4 2' '0 4' '2 1' '3 0' '0 2' '4 1'
 '4 0' '1 4' '4 3' '1 2' '3 1 0' '0 4 2' '4 3 2' '2 4 3' '4 2 1' '0 2 3'
 '2 1 0' '0 2 1' '4 0 3 2' '2 0 3 4' '4 3 1 2' '4 1 3 0' '0 1 3 4'
 '3 4 1 2' '2 3 1 0' '0, 1, 2, 3, 4']
model
1                39
3                31
2                28
4                27
0                27
0, 1, 2, 3, 4     8
0 1               3
4 2               2
4 0 3 2           2
1 0               1
2 1               1
3 0               1
0 2               1
4 1               1
4 0               1
1 4               1
2 0               1
0 4               1
1 2               1
4 3               1
3 1 0             1
0 4 2             1
4 2 1             1
0 2 3             1
4 3 2             1
2 4 3             1
0 2 1             1
2 1 0             1
4 3 1 2           1
2 0 3 4           1
4 1 3 0           1
0 1 3 4           1
3 4 1 2           1
2 3 1 0           1
Name: count, dtype: int64
model_updated
1            39
3    

In [22]:
directory = '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_2/'
out_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.out')]
for file in out_files:
    df = read_cdr_gen_out_df(filename=file)
    file_prefix = file.split('8ee2_2/')[-1].split('.')[0]
    print(file_prefix)
    plot_folding_structure_losses(df, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_2', file_prefix)
    plot_interface_losses(df, '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_2', file_prefix)

['1' '3' '4' '0' '2' '0 1' '1 0' '2 0' '4 2' '0 4' '2 1' '3 0' '0 2' '4 1'
 '4 0' '1 4' '4 3' '1 2' '3 1 0' '0 4 2' '4 3 2' '2 4 3' '4 2 1' '0 2 3'
 '2 1 0' '0 2 1' '4 0 3 2' '2 0 3 4' '4 3 1 2' '4 1 3 0' '0 1 3 4'
 '3 4 1 2' '2 3 1 0' '0, 1, 2, 3, 4']
model
1                39
3                31
2                28
4                27
0                27
0, 1, 2, 3, 4     8
0 1               3
4 2               2
4 0 3 2           2
1 0               1
2 1               1
3 0               1
0 2               1
4 1               1
4 0               1
1 4               1
2 0               1
0 4               1
1 2               1
4 3               1
3 1 0             1
0 4 2             1
4 2 1             1
0 2 3             1
4 3 2             1
2 4 3             1
0 2 1             1
2 1 0             1
4 3 1 2           1
2 0 3 4           1
4 1 3 0           1
0 1 3 4           1
3 4 1 2           1
2 3 1 0           1
Name: count, dtype: int64
model_updated
1            39
3    

In [28]:
directory = '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_3/'
out_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.out')]
for file in out_files:
    df = read_cdr_gen_out_df(filename=file)
    file_prefix = file.split('8ee2_3/')[-1].split('.out')[0]
    print(file_prefix)
    plot_folding_structure_losses_2(df, directory, file_prefix)
    plot_interface_losses_2(df, directory, file_prefix)

['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45
0, 1, 2     8
0 1         5
1 0         5
0 2         3
2 0         3
Name: count, dtype: int64
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
antigen_A_nb_C_4A_contact_cdr_gen_c38_l0_s120_h32_weights_test_12_learning_rate_0.001_models_3_recycles_3_use_templates_True_rm_templates_ic_False_bias_True
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
val_formatted: 0
val_formatted: 1
val_formatted: 2
val_formatted: 0,1
val_formatted: 0,2
val_formatted: 0,1,2
model_updated
2        56
0        51
1        45
0,1      10
0,1,2     8
0,2       6
Name: count, dtype: int64
val_formatted: 0
val_formatted: 1
val_formatted: 2
val_formatted: 0,1
val_formatted: 0,2
val_formatted: 0,1,2
['0' '1' '2' '0 1' '2 0' '0 2' '1 0' '0, 1, 2']
model
2          56
0          51
1          45

In [8]:
print(df[df['model_updated']=='0,1,3'])

     stage  stage_model  iteration  model  recycles  hard  soft  temp  seqid  \
168      2            3        169  3 1 0       3.0   1.0   0.0   1.0   0.81   

      loss  seq_ent   pae  i_pae  dgram_cce  plddt  ptm  i_ptm  rmsd  \
168  23.26     2.71  0.41   0.66     276.64   0.87  0.6    0.3  16.2   

    model_updated  
168         0,1,3  


In [1]:
# print the filename only if the error file is not empty
import os
import glob
match_pattern = '/usr/users/fatma.chafra01/ColabDesign/af/examples/8ee2_4/*.err'
matching_files = glob.glob(match_pattern)
for file in matching_files:
    if os.path.getsize(file) > 0:
        print(file)


In [None]:
# updated *.out file reading function because most of the previous considerations are not necessary
def read_cdr_gen_out_df_singlept(filename='file.out', verbose=0): 
    data = []
    stage = 0  # 0: before Stage 1, 1: Stage 1, 2: semigreedy optimization
    stage_model = 0
    
    with open(filename, 'r') as file:
        for line in file:
            if 'Stage 1: running (logits → soft)' in line:
                stage = 1
                stage_model = 1
                if verbose == 1:
                    print('stage', stage)
                    print('stage_model', stage_model)
                continue
            elif 'Running semigreedy optimization...' in line:
                # if previous stage was 1 then stage_model is kept as 1
                if stage == 1:
                    stage_model = 1
                else:
                    stage_model += 1
                stage = 2
                if verbose == 1:
                    print('stage', stage)
                    print('stage_model', stage_model)
                continue
            elif line.startswith('end'):
                break
            
            if stage > 0:
                # Parse the line
                if verbose == 1:
                    print('line', line)
                parts = line.split(' [')
                if verbose == 1:
                    print('parts', parts)
                if len(parts) < 2 or not parts[0].split()[0].isdigit():
                    continue
                if verbose == 1:
                    print('parts', parts)

                iteration = int(parts[0].split()[0])
                if verbose == 1:
                    print('iteration', iteration)
                parts_2 = parts[1].split('] ')
                if verbose == 1:
                    print('parts_2', parts_2)
                model = parts_2[0]
                if verbose == 1:
                    print('model', model)
                parts_3 = parts_2[1].split()
                #print(parts[1])
                #print(parts[2])
                #model = parts[2].strip('[').strip(']')
                #model = int(re.findall(r'\d+', parts[1])[0])
                
                row = {'stage':stage, 'stage_model': stage_model, 'iteration': iteration, 'model': model}
                if verbose == 1:
                    print(row)
                    print('range', range(0, len(parts_3), 2))
                
                for i in range(0, len(parts_3), 2):
                    key = parts_3[i]
                    if verbose == 1:
                        print('key', key)
                    value = float(parts_3[i+1])
                    if verbose == 1:
                        print('value', value)
                    row[key] = value
                
                data.append(row)
    
    df = pd.DataFrame(data)
    print(df['model'].unique())
    print(df['model'].value_counts())
    # print(second_test['model'])
    # df['model_updated'] = df['model'].map(lambda x: ','.join(sorted(x.split())))
    df['model_updated'] = df['model'].apply(normalize_and_sort)
    print(df['model_updated'].value_counts())

    return df