In [None]:
import torch
import pandas as pd
import numpy as np
import requests
import os
from tqdm import tqdm

In [None]:
inter = pd.read_csv('../glass/interactions_total.tsv', sep='\t')
lig = pd.read_csv('../glass/ligands.tsv', sep='\t')
print(inter.shape, lig.shape)
inter = inter[inter['Parameter'].isin(['Ki', 'IC50', 'EC50'])]
print(inter.shape, lig.shape)

In [None]:
def get_float(entry:str):
    try:
        return float(entry)
    except Exception as e:
        return np.nan

In [None]:
inter = inter.rename({'UniProt ID' : 'Target_ID', 'InChI Key': 'Drug_ID', 'Value' : 'Y'}, axis=1)[['Drug_ID','Target_ID', "Y"]]
lig = lig.rename({'UniProt ID' : 'Target_ID', 'InChI Key': 'Drug_ID', 'Value' : 'Y', 'Canonical SMILES' : 'Drug'}, axis=1)[['Drug_ID', 'Drug']]

In [None]:
inter['Y'] = inter['Y'].apply(get_float)

In [None]:
inter = inter[inter['Y'].notna()]
print(inter.shape, lig.shape)

In [None]:
inter = inter.groupby(['Drug_ID', 'Target_ID']).agg('median').reset_index()
print(inter.shape, lig.shape)

In [None]:
for i in tqdm(inter['Target_ID'].unique()):
    response = requests.get('https://alphafold.ebi.ac.uk/files/AF-{i}-F1-model_v2.pdb'.format(i=i))
    if response:
        with open('resources/structures/{i}.pdb'.format(i=i), 'w') as file:
            file.write(response.text)

In [None]:
available_structures = [x.split('.')[0] for x in os.listdir('resources/structures')]
inter = inter[inter['Target_ID'].isin(available_structures)]
print(inter.shape, lig.shape)

In [None]:
lig = lig[lig['Drug_ID'].isin(inter['Drug_ID'].unique())]
print(lig.shape)

In [None]:
inter.to_csv('resources/drugs/inter.tsv', sep='\t', index=False)
lig.to_csv('resources/drugs/lig.tsv', sep='\t', index=False)