## Drug Repurposing: A Hybrid Graph Convolutional Network for Predicting Cancer Drug Response

<center><img src="./images/model.png" style="width:70%;"></center>

<center><img src="./images/result_table.png" style="width:40%; padding: 0px 0px 10% 0px;"><img src="images/result_plot.png" style="width:40%"></center>

## Katana AI Pipeline for End to End Drug Repurposing

<img src="images/pipeline_full.png" style="width: 1500px;"/>

In [None]:
%load_ext autoreload
%autoreload 2
import warnings

from katana import remote
from katana.remote import import_data

warnings.filterwarnings("ignore")
client = remote.Client()
graph = client.create_graph(num_partitions=4)
import_data.rdg(graph, "gs://hls-dataset-bucket/demo/DeepCDR")

print("{} nodes, {} edges".format(graph.num_nodes(), graph.num_edges()))

In [None]:
# Print some basic statistics
graph.query(
    """MATCH (a:DRUG)<-[:FOR_DRUG]-(g:GDSC)-[:HAS_CELL_LINE]->(c:CELL_LINE)
RETURN COUNT(Distinct a) as DRUG, COUNT(Distinct g) as GDSC, COUNT(Distinct c) as CELL_LINE,
COUNT(a) as DRUG_CELL_LINE_PAIRS"""
).head()

In [None]:
graph.schema().view()

## Initialize the Katana AI training pipeline

<img src="images/pipeline_preprocess.png" style="width: 1500px;"/>

In [None]:
from datetime import timedelta
from timeit import default_timer

from src import katana_pipeline

rec_pipeline = katana_pipeline.RecipePipeline(graph)

## Load model and training hyperparameters

In [None]:
from config import hyperparams

model_hp, training_hp = hyperparams.load_hyperparams()
model_hp, training_hp

## Add features for training

In [None]:
start_time = default_timer()
rec_pipeline.feature_generator()
feature_generation_time = default_timer() - start_time
print(f"***Took {feature_generation_time} seconds to generate the features.***")
assert timedelta(seconds=feature_generation_time) < timedelta(minutes=4, seconds=0)

## Initialize model training

<img src="images/pipeline_gnn.png" style="width: 1500px;"/>

In [None]:
start_time = default_timer()
rec_pipeline.train(model_hp, training_hp)
train_time = default_timer() - start_time
print(f"***Took {train_time} seconds to train the model.***")

In [None]:
start_time = default_timer()
test_res = rec_pipeline.test(training_hp)
test_time = default_timer() - start_time
print(f"***Took {test_time} seconds to test the model.***")
assert timedelta(seconds=test_time) < timedelta(minutes=1, seconds=0)
test_res

In [None]:
start_time = default_timer()
rec_pipeline.plot(training_hp)
test_time = default_timer() - start_time
print(f"***Took {test_time} seconds to plot figures.***")
assert timedelta(seconds=test_time) < timedelta(minutes=1, seconds=0)

## Run trained model for inference

<img src="images/pipeline_inference.png" style="width: 1500px;"/>

In [None]:
start_time = default_timer()
# Paclitaxel / OVKATE_OVARY cancer
paclitaxel = (
    "CC1=C2C(C(=O)C3(C(CC4C(C3C(C(C2(C)C)(CC1OC(=O)C(C(C5=CC=CC=C5)"
    "NC(=O)C6=CC=CC=C6)O)O)OC(=O)C7=CC=CC=C7)(CO4)OC(=O)C)O)C)OC(=O)C"
)
test_res = rec_pipeline.infer(training_hp, paclitaxel, "OVKATE_OVARY")
test_time = default_timer() - start_time
print(f"***Took {test_time} seconds for inference.***")
print("IC50 predicted: ", test_res)
assert timedelta(seconds=test_time) < timedelta(seconds=30)

## Run trained model to save node embeddings

In [None]:
start_time = default_timer()
rec_pipeline.infer_embeddings(model_hp)
feature_generation_time = default_timer() - start_time
print(f"***Took {feature_generation_time} seconds to save node embeddings.***")
assert timedelta(seconds=feature_generation_time) < timedelta(minutes=1, seconds=0)