# Computing counts for synthesized compounds per team and target

In [1]:
import pandas as pd
import numpy as np
import copy

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [3]:
# Load data
df = pd.read_csv('../merged_submission_lists/df_complete.csv')
all_teams = list(df.team.unique())
all_teams.sort()
df = df[df.synthesized == 1] # we just consider synthesized compounds

print(df.shape)

df.head()

(1474, 16)


  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,smiles,owner_info,team,target_subm,list_pos,target_selected,from_gtm,from_medoids,from_topranked,in_selected,in_selected_minusDB,plannedToSynth,synthesized,hit,target_hit,further-submission-info
40,Oc1cccc(CC(=O)Nc2n[nH]c3ccc(F)cc23)c1,N_mlinch.csv-POS4786,imolecule,n,4786,n,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,,
49,CCc1cc(NC(=O)C(CO)c2ccc(Cl)cc2)n[nH]1,N_mlinch.csv-POS8850,imolecule,n,8850,n,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,,
98,Oc1ccc2c(NC(=O)Cc3ccc(F)cc3)[nH]nc2c1,N_mlinch.csv-POS5291+S_mlinch.csv-POS6470,imolecule,n,5291,n,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,,
99,Oc1ccc2c(NC(=O)Cc3ccc(F)cc3)[nH]nc2c1,N_mlinch.csv-POS5291+S_mlinch.csv-POS6470,imolecule,s,6470,n,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,,
150,NC(C1CCCCCC1)C(=O)Nc1ccc2n[nH]cc2c1,N_mlinch.csv-POS7232,imolecule,n,7232,n,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,,


In [4]:
# number of synthesized molecules
print(np.unique(df['smiles']).shape)

(878,)


In [5]:
# teams
np.unique(df['team'].values)

array(['ai4science', 'aiwinter', 'cermn', 'covid19ddc', 'deeplab',
       'imolecule', 'jku', 'kyuken', 'lci', 'luxscreen', 'pharmai',
       'safan', 'sarstroopers', 'sarswars', 'virtualflow', 'way2drug',
       'yoda'], dtype=object)

In [6]:
# targets
np.unique(df['target_selected'].values)

array(['n', 'nsp12', 'nsp3', 'nsp5', 's', 'tmprss2'], dtype=object)

In [7]:
# targets submitted
np.unique(df['target_subm'].values)

array(['aak1', 'furin', 'n', 'nsp12', 'nsp3', 'nsp5', 's', 'tmprss2'],
      dtype=object)

In [8]:
# Only consider rows for which selected and submitted targets match
df = df[df['target_subm'] == df['target_selected']]

In [9]:
# counts pert team and target
counts = df.groupby(['target_selected', 'team']).nunique()
counts = counts[['smiles']]
counts.columns = ['count']
counts = counts.reset_index(level='team')

counts.head()

Unnamed: 0_level_0,team,count
target_selected,Unnamed: 1_level_1,Unnamed: 2_level_1
n,imolecule,73
n,sarswars,8
n,virtualflow,46
nsp12,covid19ddc,10
nsp12,imolecule,42


In [10]:
# display count table

# create pivot table
count_table = pd.pivot_table(counts, values='count', index=['target_selected'], columns=['team'], fill_value=0)

# add missing teams
included_teams = list(count_table.columns)
for team in np.unique(all_teams):
    if team not in included_teams:
        count_table[team] = np.zeros(count_table.shape[0])

# sort targets
count_table = count_table.filter(items = ['n', 'nsp3', 'nsp5', 'nsp12', 's', 'tmprss2'], axis=0)
count_table = count_table[all_teams]

# sum row and column
team_sum = count_table.sum(axis=0)
team_sum.name = 'SUM'
count_table = count_table.append(team_sum)
count_table['SUM'] = count_table.sum(1)


count_table

team,ai4science,aiwinter,belarus,cermn,covid19ddc,deeplab,imolecule,jku,kyuken,lambdazero,lci,luxscreen,nuwave,pharmai,safan,sarstroopers,sarswars,virtualflow,way2drug,yoda,SUM
target_selected,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
n,0.0,0.0,0.0,0.0,0.0,0.0,73.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,46.0,0.0,0.0,127.0
nsp3,0.0,12.0,0.0,7.0,6.0,8.0,4.0,0.0,15.0,0.0,86.0,0.0,0.0,0.0,2.0,5.0,0.0,7.0,13.0,3.0,168.0
nsp5,0.0,8.0,0.0,3.0,5.0,7.0,6.0,62.0,0.0,0.0,54.0,2.0,0.0,0.0,17.0,0.0,2.0,2.0,2.0,11.0,181.0
nsp12,0.0,0.0,0.0,0.0,10.0,0.0,42.0,5.0,0.0,0.0,5.0,14.0,0.0,0.0,15.0,19.0,9.0,44.0,0.0,0.0,163.0
s,0.0,0.0,0.0,0.0,0.0,11.0,22.0,0.0,41.0,0.0,0.0,0.0,0.0,35.0,0.0,1.0,0.0,24.0,0.0,22.0,156.0
tmprss2,16.0,0.0,0.0,49.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,43.0,21.0,0.0,134.0
SUM,16.0,20.0,0.0,59.0,21.0,26.0,147.0,67.0,56.0,0.0,145.0,21.0,0.0,35.0,34.0,25.0,19.0,166.0,36.0,36.0,929.0
