<a href="https://colab.research.google.com/github/swansonk14/chemprop/blob/broad/broad/Broad_t_SNE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
import os
from typing import List, Union

import pandas as pd
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import Figure, Layout, Scatter

In [0]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
        '''))

In [0]:
red_colorscale = [[0.0, 'rgb(102,0,0)'], [2/3, 'rgb(255,0,0)'], [1.0, 'rgb(255,204,204)']]
blue_colorscale = [[0.0, 'rgb(0,0,102)'], [2/3, 'rgb(0,0,255)'], [1.0, 'rgb(204,204,255)']]

def scatter(data_name: str, colorscale: List[List[Union[float, str]]]) -> Scatter:
    with open(f'/content/gdrive/My Drive/t-SNE/data/{data_name}') as f:
        data = pd.read_csv(f)
    
    scatter = Scatter(
        x = data['x'],
        y = data['y'],
        mode = 'markers',
        marker = dict(
            color=data['color'],
            colorscale=colorscale
        ),
        text = [f'value={value:.4f}, {smiles}' for value, smiles in zip(data['value'], data['smiles'])]
    )
    
    return scatter

def tsne(data_name_1: str, data_name_2: str = None):
    configure_plotly_browser_state()
    init_notebook_mode(connected=False)
    
    data = [scatter(data_name_1, red_colorscale)]
    
    if data_name_2 is not None:
      data.append(scatter(data_name_2, blue_colorscale))

    layout = Layout(
        hovermode = 'closest',
        xaxis = dict(
            autorange=True,
            showgrid=False,
            zeroline=False,
            showline=False,
            ticks='',
            showticklabels=False
        ),
        yaxis = dict(
            autorange=True,
            showgrid=False,
            zeroline=False,
            showline=False,
            ticks='',
            showticklabels=False
        )
    )

    fig = Figure(data=data, layout=layout)
    iplot(fig)

In [29]:
print('Available t-SNE data files\n')
for name in os.listdir('/content/gdrive/My Drive/t-SNE/data/'):
  print(name)

Available t-SNE data files

10k_inhibition_smiles_p30.csv
2600_inhibition_smiles_p500.csv
2600_bicarbinhibition_smiles_p30.csv
10k_bicarbinhibition_smiles_p5.csv
2600_bicarbinhibition_smiles_p5.csv
10k_bicarbinhibition_smiles_p50.csv
10k_bicarbinhibition_smiles_p30.csv
10k_inhibition_smiles_p50.csv
2600_bicarbinhibition_smiles_p50.csv
10k_bicarbinhibition_smiles_p100.csv
2600_inhibition_smiles_p100.csv
2600_bicarbinhibition_smiles_p2.csv
2600_inhibition_smiles_p5.csv
2600_inhibition_smiles_p50.csv
2600_inhibition_smiles_p2.csv
2600_bicarbinhibition_smiles_p500.csv
10k_inhibition_smiles_p5.csv
10k_bicarbinhibition_smiles_p500.csv
10k_inhibition_smiles_p2.csv
10k_bicarbinhibition_smiles_p2.csv
2600_inhibition_smiles_p30.csv
2600_bicarbinhibition_smiles_p100.csv
10k_inhibition_smiles_p100.csv
10k_inhibition_smiles_p500.csv


In [31]:
# Replace the two files names below with any of the files listed above
tsne('2600_inhibition_smiles_p500.csv', '10k_inhibition_smiles_p500.csv')