# Append additional metainfo columns to the 220k library

In [1]:
import os
from pathlib import Path

import pandas as pd

In [2]:
library_file = Path("./220k_library.csv")
mapping_table = Path("../../control_panel/sdl_server/mapping_table/General_mapping_sampler.csv")
assert library_file.exists()
assert mapping_table.exists()

In [3]:
library = pd.read_csv(library_file)
library.head()

Unnamed: 0,id,combined_mol_SMILES,A_smiles,B_smiles,C_smiles,D_smiles
0,0,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)CC12C[C@H]3C[C@@...,NCCN(C)C,C[N+]#[C-],CCCCCC=O,OC(CC(C1)(C2)C[C@@H]3C[C@H]2C[C@H]1C3)=O
1,1,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)C[C@@]12C[C@@H]3...,NCCN(C)C,C[N+]#[C-],CCCCCC=O,OC(C[C@@](C1)(C2)C[C@]3(O)C[C@H]2C[C@H]1C3)=O
2,2,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)CCN1CCCCC1,NCCN(C)C,C[N+]#[C-],CCCCCC=O,OC(CCN1CCCCC1)=O
3,3,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)CN(C)C,NCCN(C)C,C[N+]#[C-],CCCCCC=O,CN(CC(O)=O)C
4,4,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)CCN(C)C,NCCN(C)C,C[N+]#[C-],CCCCCC=O,CN(CCC(O)=O)C


In [4]:
mapping = pd.read_csv(mapping_table)
mapping.head()

Unnamed: 0,Motor_ID,Row,Column,Frame_ID,Tube_len,DBoard_ID,DBoard_cor,Well_cor,Reagent,Reagent Name,Reagent SMILES,Volume
0,0,,,,,0,1,,,,,
1,1,A,0.0,A,60.0,1,1,A1,A1,D5616,NCCN(C)C,800.0
2,2,A,1.0,A,60.0,1,2,A3,A3,A0409,NCCCN1CCOCC1,800.0
3,3,A,2.0,A,60.0,1,3,A5,A5,D0790,NCCCN(C)C,800.0
4,4,A,3.0,A,60.0,1,4,A7,A7,A2046,NC1CCN(CC1)C,800.0


In [5]:
# add reagent id and name columns to library
# for example, "A_id" column will contain the "Reagent" whose SMILES matches the "A_smiles"
for reagent in ["A", "B", "C", "D"]:
    library = library.merge(mapping[["Reagent", "Reagent Name", "Reagent SMILES"]], left_on=f"{reagent}_smiles", right_on="Reagent SMILES", how="left")
    library = library.rename(columns={"Reagent": f"{reagent}_id"})
    library = library.rename(columns={"Reagent Name": f"{reagent}_name"})
    library = library.drop(columns="Reagent SMILES")

In [6]:
# add another column component_str as the concatenation of A_id, B_id, C_id, D_id
library["component_str"] = library["A_id"] + library["B_id"] + library["C_id"] + library["D_id"]
library

Unnamed: 0,id,combined_mol_SMILES,A_smiles,B_smiles,C_smiles,D_smiles,A_id,A_name,B_id,B_name,C_id,C_name,D_id,D_name,component_str
0,0,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)CC12C[C@H]3C[C@@...,NCCN(C)C,C[N+]#[C-],CCCCCC=O,OC(CC(C1)(C2)C[C@@H]3C[C@H]2C[C@H]1C3)=O,A1,D5616,B1,TRC-I289721,C1,H0133,D1,A0736,A1B1C1D1
1,1,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)C[C@@]12C[C@@H]3...,NCCN(C)C,C[N+]#[C-],CCCCCC=O,OC(C[C@@](C1)(C2)C[C@]3(O)C[C@H]2C[C@H]1C3)=O,A1,D5616,B1,TRC-I289721,C1,H0133,D2,H1414,A1B1C1D2
2,2,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)CCN1CCCCC1,NCCN(C)C,C[N+]#[C-],CCCCCC=O,OC(CCN1CCCCC1)=O,A1,D5616,B1,TRC-I289721,C1,H0133,D3,335924,A1B1C1D3
3,3,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)CN(C)C,NCCN(C)C,C[N+]#[C-],CCCCCC=O,CN(CC(O)=O)C,A1,D5616,B1,TRC-I289721,C1,H0133,D4,D3585,A1B1C1D4
4,4,CCCCCC(C(=O)NC)N(CCN(C)C)C(=O)CCN(C)C,NCCN(C)C,C[N+]#[C-],CCCCCC=O,CN(CCC(O)=O)C,A1,D5616,B1,TRC-I289721,C1,H0133,D5,D2690,A1B1C1D5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221179,221179,CCCCC/C=C\C/C=C\CCCCCCCC(C(=O)NC12C[C@H]3C[C@@...,NCCCN1CCCCCC1,[C-]#[N+]C1(C[C@@H]2C3)C[C@@H]3C[C@@H](C2)C1,CCCCC/C=C\C/C=C\CCCCCCCC=O,CCCCCCCCC(OC(CCCCC(O)=O)=O)CCCCCCCC,A32,TRC-A809785,B12,I0824,C16,O0225,D32,H1401+A0161,A32B12C16D32
221180,221180,CCCCC/C=C\C/C=C\CCCCCCCC(C(=O)NC12C[C@H]3C[C@@...,NCCCN1CCCCCC1,[C-]#[N+]C1(C[C@@H]2C3)C[C@@H]3C[C@@H](C2)C1,CCCCC/C=C\C/C=C\CCCCCCCC=O,CCC(CCC(OC(CCCCC(O)=O)=O)CC(C)C)CCCC,A32,TRC-A809785,B12,I0824,C16,O0225,D33,E0144+A0161,A32B12C16D33
221181,221181,CCCCC/C=C\C/C=C\CCCCCCCC(C(=O)NC12C[C@H]3C[C@@...,NCCCN1CCCCCC1,[C-]#[N+]C1(C[C@@H]2C3)C[C@@H]3C[C@@H](C2)C1,CCCCC/C=C\C/C=C\CCCCCCCC=O,OC(CCCCC(OCCCCCCCCCC)=O)=O,A32,TRC-A809785,B12,I0824,C16,O0225,D34,D0031+A0161,A32B12C16D34
221182,221182,CCCCC/C=C\C/C=C\CCCCCCCC(C(=O)NC12C[C@H]3C[C@@...,NCCCN1CCCCCC1,[C-]#[N+]C1(C[C@@H]2C3)C[C@@H]3C[C@@H](C2)C1,CCCCC/C=C\C/C=C\CCCCCCCC=O,OC(CCCCC(OCCCCCCCCCCC)=O)=O,A32,TRC-A809785,B12,I0824,C16,O0225,D35,U0005+A0161,A32B12C16D35


In [7]:
# save the updated library with new name
library.to_csv("./220k_library_with_meta.csv", index=False)