In [None]:
import os
import pandas as pd
import re

base_dir = os.getcwd()  # or set to your base path if not running from cwd

all_results = []

for uniprot in os.listdir(base_dir):
    uniprot_path = os.path.join(base_dir, uniprot)
    if os.path.isdir(uniprot_path):
        for subfolder in os.listdir(uniprot_path):
            match = re.match(r'dock(\d+)_(\w+)', subfolder)
            if match:
                pocket_number, ligcode = match.groups()
                result_file = os.path.join(uniprot_path, subfolder, 'final_results.csv')
                if os.path.isfile(result_file):
                    df = pd.read_csv(result_file)
                    df['uniprot'] = uniprot
                    df['pocket_number'] = pocket_number
                    df['ligcode'] = ligcode
                    all_results.append(df)

# Concatenate all dataframes
final_df = pd.concat(all_results, ignore_index=True)

# Optionally, save to a CSV
#final_df.to_csv('all_final_results.csv', index=False)


Unnamed: 0,Tool1,Tool2,PoseNumber1,PoseNumber2,Score1,Score2,File1,File2,RMSD,uniprot,pocket_number,ligcode
0,LeDock,GOLD,6,2,-2.43,31.45,complex_6.pdb,complex_2.pdb,7.121884,P03372,4,EZT
1,LeDock,GOLD,6,1,-2.43,31.31,complex_6.pdb,complex_1.pdb,7.110847,P03372,4,EZT
2,LeDock,GOLD,6,3,-2.43,31.66,complex_6.pdb,complex_3.pdb,7.099406,P03372,4,EZT
3,LeDock,GOLD,2,2,-2.94,31.45,complex_2.pdb,complex_2.pdb,7.161547,P03372,4,EZT
4,LeDock,GOLD,2,1,-2.94,31.31,complex_2.pdb,complex_1.pdb,7.155044,P03372,4,EZT


In [3]:
final_df['uniprot_ligcode']= final_df['uniprot'] + '_' + final_df['ligcode']
final_df.head()

Unnamed: 0,Tool1,Tool2,PoseNumber1,PoseNumber2,Score1,Score2,File1,File2,RMSD,uniprot,pocket_number,ligcode,uniprot_ligcode
0,LeDock,GOLD,6,2,-2.43,31.45,complex_6.pdb,complex_2.pdb,7.121884,P03372,4,EZT,P03372_EZT
1,LeDock,GOLD,6,1,-2.43,31.31,complex_6.pdb,complex_1.pdb,7.110847,P03372,4,EZT,P03372_EZT
2,LeDock,GOLD,6,3,-2.43,31.66,complex_6.pdb,complex_3.pdb,7.099406,P03372,4,EZT,P03372_EZT
3,LeDock,GOLD,2,2,-2.94,31.45,complex_2.pdb,complex_2.pdb,7.161547,P03372,4,EZT,P03372_EZT
4,LeDock,GOLD,2,1,-2.94,31.31,complex_2.pdb,complex_1.pdb,7.155044,P03372,4,EZT,P03372_EZT


In [None]:
# For each unique uniprot_ligcode, how many rows showed an RMSD value lower than 2.0?
rmsd_counts = final_df[final_df['RMSD'] < 2.0].groupby('uniprot_ligcode').size().reset_index(name='count')

In [10]:
# For each unique uniprot_ligcode, how many rows are between PoseNumber1 = 1 and PoseNumber2 = 1 and showed an RMSD value lower than 2.0?
pose_counts = final_df[(final_df['PoseNumber1'] == 1) & (final_df['PoseNumber2'] == 1) & (final_df['RMSD'] < 2.0)].groupby('uniprot_ligcode').size().reset_index(name='count')

In [14]:
# For each unique uniprot_ligcode, how many of them showed an RMSD value lower than 2.0 with at least three different Tool1 and Tool2 combinations?
tool_counts = final_df[final_df['RMSD'] < 2.0].groupby('uniprot_ligcode').apply(lambda x: x[['Tool1', 'Tool2']].drop_duplicates().shape[0]).reset_index(name='unique_tool_combinations')


In [15]:
tool_counts

Unnamed: 0,uniprot_ligcode,unique_tool_combinations
0,O14757_agy,1
1,O14757_ucn,3
2,O14965_0c8,1
3,O14965_eml,1
4,O14965_vx6,1
5,O60674_0nv,3
6,O60674_1k3,1
7,O60674_jak,1
8,P00519_3yy,1
9,P00519_627,1


In [8]:
len(final_df['uniprot_ligcode'].unique())

57

In [6]:
final_df.query('uniprot_ligcode == "O14757_ucn" & RMSD < 2.0').sort_values('RMSD')

Unnamed: 0,Tool1,Tool2,PoseNumber1,PoseNumber2,Score1,Score2,File1,File2,RMSD,uniprot,pocket_number,ligcode,uniprot_ligcode
95816,LeDock,GOLD,9,9,-4.87,45.59,complex_9.pdb,complex_9.pdb,0.331356,O14757,3,ucn,O14757_ucn
95817,LeDock,GOLD,9,4,-4.87,45.21,complex_9.pdb,complex_4.pdb,0.349086,O14757,3,ucn,O14757_ucn
95804,LeDock,GOLD,9,16,-4.87,45.1,complex_9.pdb,complex_16.pdb,0.364222,O14757,3,ucn,O14757_ucn
95803,LeDock,GOLD,9,20,-4.87,45.5,complex_9.pdb,complex_20.pdb,0.370975,O14757,3,ucn,O14757_ucn
96805,LeDock,GOLD,1,8,-5.97,60.65,complex_1.pdb,complex_8.pdb,0.597613,O14757,4,ucn,O14757_ucn
93839,LeDock,GOLD,3,11,-4.92,57.49,complex_3.pdb,complex_11.pdb,0.725013,O14757,5,ucn,O14757_ucn
96473,LeDock,GOLD,2,2,-5.47,54.56,complex_2.pdb,complex_2.pdb,0.768561,O14757,1,ucn,O14757_ucn
93855,LeDock,GOLD,3,12,-4.92,56.11,complex_3.pdb,complex_12.pdb,0.793224,O14757,5,ucn,O14757_ucn
93844,LeDock,GOLD,3,16,-4.92,56.55,complex_3.pdb,complex_16.pdb,0.796979,O14757,5,ucn,O14757_ucn
96475,LeDock,GOLD,2,3,-5.47,54.54,complex_2.pdb,complex_3.pdb,0.806143,O14757,1,ucn,O14757_ucn
