In [1]:
%%HTML
<style>
   div#notebook-container    { width: 95%; }
   div#menubar-container     { width: 65%; }
   div#maintoolbar-container { width: 99%; }
</style>

In [6]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm

import numpy as np
import pandas as pd
import random

import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
from sympy.geometry import Point3D

In [4]:
file_folder = '../../data/input'
os.listdir(file_folder)

['sample_submission.csv',
 'magnetic_shielding_tensors.csv',
 'potential_energy.csv',
 'scalar_coupling_contributions.csv',
 'dipole_moments.csv',
 'mulliken_charges.csv',
 'train.csv',
 'test.csv',
 'structures.csv',
 'structures']

In [5]:
train = pd.read_csv(f'{file_folder}/train.csv')
test = pd.read_csv(f'{file_folder}/test.csv')
magnetic_shielding_tensors = pd.read_csv(f'{file_folder}/magnetic_shielding_tensors.csv')
dipole_moments = pd.read_csv(f'{file_folder}/dipole_moments.csv')
mulliken_charges = pd.read_csv(f'{file_folder}/mulliken_charges.csv')
potential_energy = pd.read_csv(f'{file_folder}/potential_energy.csv')
scalar_coupling_contributions = pd.read_csv(f'{file_folder}/scalar_coupling_contributions.csv')
structures = pd.read_csv(f'{file_folder}/structures.csv')

In [11]:


# structures = pd.read_csv('../input/structures.csv')
molecule_names = structures.molecule_name.unique()

# initiate the plotly notebook mode
init_notebook_mode(connected=True)


def plot_molecule(molecule_name, structures_df):
    """Creates a 3D plot of the molecule"""
    
    atomic_radii = dict(C=0.77, F=0.71, H=0.38, N=0.75, O=0.73)  
    cpk_colors = dict(C='black', F='green', H='white', N='blue', O='red')

    molecule = structures_df[structures_df.molecule_name == molecule_name]
    coordinates = molecule[['x', 'y', 'z']].values
    x_coordinates = coordinates[:, 0]
    y_coordinates = coordinates[:, 1]
    z_coordinates = coordinates[:, 2]
    elements = molecule.atom.tolist()
    radii = [atomic_radii[element] for element in elements]
    
    def get_bonds():
        """Generates a set of bonds from atomic cartesian coordinates"""
        ids = np.arange(coordinates.shape[0])
        bonds = dict()
        coordinates_compare, radii_compare, ids_compare = coordinates, radii, ids
        
        for _ in range(len(ids)):
            coordinates_compare = np.roll(coordinates_compare, -1, axis=0)
            radii_compare = np.roll(radii_compare, -1, axis=0)
            ids_compare = np.roll(ids_compare, -1, axis=0)
            distances = np.linalg.norm(coordinates - coordinates_compare, axis=1)
            bond_distances = (radii + radii_compare) * 1.3
            mask = np.logical_and(distances > 0.1, distances <  bond_distances)
            distances = distances.round(2)
            new_bonds = {frozenset([i, j]): dist for i, j, dist in zip(ids[mask], ids_compare[mask], distances[mask])}
            bonds.update(new_bonds)
        return bonds            
            
    def atom_trace():
        """Creates an atom trace for the plot"""
        colors = [cpk_colors[element] for element in elements]
        markers = dict(color=colors, line=dict(color='lightgray', width=2), size=7, symbol='circle', opacity=0.8)
        trace = go.Scatter3d(x=x_coordinates, y=y_coordinates, z=z_coordinates, mode='markers', marker=markers,
                             text=elements, name='')
        return trace

    def bond_trace():
        """"Creates a bond trace for the plot"""
        trace = go.Scatter3d(x=[], y=[], z=[], hoverinfo='none', mode='lines',
                             marker=dict(color='grey', size=7, opacity=1))
        for i, j in bonds.keys():
            trace['x'] += (x_coordinates[i], x_coordinates[j], None)
            trace['y'] += (y_coordinates[i], y_coordinates[j], None)
            trace['z'] += (z_coordinates[i], z_coordinates[j], None)
        return trace
    
    bonds = get_bonds()
    
    zipped = zip(range(len(elements)), x_coordinates, y_coordinates, z_coordinates)
    annotations_id = [dict(text=num, x=x, y=y, z=z, showarrow=False, yshift=15, font = dict(color = "blue")) for num, x, y, z in zipped]
    
    annotations_length = []
    for (i, j), dist in bonds.items():
        p_i, p_j = Point3D(coordinates[i]), Point3D(coordinates[j])
        p = p_i.midpoint(p_j)
        annotation = dict(text=dist, x=float(p.x), y=float(p.y), z=float(p.z), showarrow=False, yshift=15)
        annotations_length.append(annotation)   
    
    updatemenus = list([
        dict(buttons=list([
                 dict(label = 'Atom indices',
                      method = 'relayout',
                      args = [{'scene.annotations': annotations_id}]),
                 dict(label = 'Bond lengths',
                      method = 'relayout',
                      args = [{'scene.annotations': annotations_length}]),
                 dict(label = 'Atom indices & Bond lengths',
                      method = 'relayout',
                      args = [{'scene.annotations': annotations_id + annotations_length}]),
                 dict(label = 'Hide all',
                      method = 'relayout',
                      args = [{'scene.annotations': []}])
                 ]),
                 direction='down',
                 xanchor = 'left',
                 yanchor = 'top'
            ),        
    ])
    
    data = [atom_trace(), bond_trace()]
    axis_params = dict(showgrid=False, showbackground=False, showticklabels=False, zeroline=False, titlefont=dict(color='white'))
    layout = dict(scene=dict(xaxis=axis_params, yaxis=axis_params, zaxis=axis_params, annotations=annotations_id), 
                  margin=dict(r=0, l=0, b=0, t=0), showlegend=False, updatemenus=updatemenus)

    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

In [8]:
molecule_name = random.choice(molecule_names)
plot_molecule(molecule_name, structures)

In [10]:


# structures = pd.read_csv('../input/structures.csv')
molecule_names = structures.molecule_name.unique()

# initiate the plotly notebook mode
init_notebook_mode(connected=True)


def molecule_property(molecule_name, structures_df):
    """Creates a 3D plot of the molecule"""
    
    atomic_radii = dict(C=0.77, F=0.71, H=0.38, N=0.75, O=0.73)  
    cpk_colors = dict(C='black', F='green', H='white', N='blue', O='red')

    molecule = structures_df[structures_df.molecule_name == molecule_name]
    coordinates = molecule[['x', 'y', 'z']].values
    x_coordinates = coordinates[:, 0]
    y_coordinates = coordinates[:, 1]
    z_coordinates = coordinates[:, 2]
    elements = molecule.atom.tolist()
    radii = [atomic_radii[element] for element in elements]
    
    def get_bonds():
        """Generates a set of bonds from atomic cartesian coordinates"""
        ids = np.arange(coordinates.shape[0])
        bonds = dict()
        coordinates_compare, radii_compare, ids_compare = coordinates, radii, ids
        
        for _ in range(len(ids)):
            coordinates_compare = np.roll(coordinates_compare, -1, axis=0)
            radii_compare = np.roll(radii_compare, -1, axis=0)
            ids_compare = np.roll(ids_compare, -1, axis=0)
            distances = np.linalg.norm(coordinates - coordinates_compare, axis=1)
            bond_distances = (radii + radii_compare) * 1.3
            mask = np.logical_and(distances > 0.1, distances <  bond_distances)
            distances = distances.round(2)
            new_bonds = {frozenset([i, j]): dist for i, j, dist in zip(ids[mask], ids_compare[mask], distances[mask])}
            bonds.update(new_bonds)
        return bonds            
            
    def atom_trace():
        """Creates an atom trace for the plot"""
        colors = [cpk_colors[element] for element in elements]
        markers = dict(color=colors, line=dict(color='lightgray', width=2), size=7, symbol='circle', opacity=0.8)
        trace = go.Scatter3d(x=x_coordinates, y=y_coordinates, z=z_coordinates, mode='markers', marker=markers,
                             text=elements, name='')
        return trace

    def bond_trace():
        """"Creates a bond trace for the plot"""
        trace = go.Scatter3d(x=[], y=[], z=[], hoverinfo='none', mode='lines',
                             marker=dict(color='grey', size=7, opacity=1))
        for i, j in bonds.keys():
            trace['x'] += (x_coordinates[i], x_coordinates[j], None)
            trace['y'] += (y_coordinates[i], y_coordinates[j], None)
            trace['z'] += (z_coordinates[i], z_coordinates[j], None)
        return trace
    
    bonds = get_bonds()
    return bonds, atom_trace(), bond_trace()

In [12]:
molecule_name = random.choice(molecule_names)
print(molecule_name)
bonds, atom_trace, bond_trace = molecule_property(molecule_name, structures)

dsgdb9nsd_099055


In [13]:
bonds

{frozenset({0, 1}): 1.53,
 frozenset({1, 2}): 1.53,
 frozenset({2, 3}): 1.22,
 frozenset({4, 5}): 1.46,
 frozenset({5, 6}): 1.54,
 frozenset({7, 8}): 1.2,
 frozenset({2, 4}): 1.37,
 frozenset({5, 7}): 1.46,
 frozenset({1, 13}): 1.1,
 frozenset({6, 18}): 1.09,
 frozenset({0, 9}): 1.09,
 frozenset({0, 11}): 1.09,
 frozenset({1, 12}): 1.1,
 frozenset({6, 17}): 1.09,
 frozenset({8, 19}): 1.06,
 frozenset({0, 10}): 1.09,
 frozenset({4, 14}): 1.01,
 frozenset({5, 15}): 1.1,
 frozenset({6, 16}): 1.09}

In [14]:
atom_trace

{'type': 'scatter3d',
 'x': array([ 0.15417602, -0.04113136, -1.50630591, -2.42967525, -1.70951099,
        -3.04821834, -3.64353928, -3.01413678, -2.97257323,  1.18708963,
        -0.07788805, -0.51407933,  0.57568086,  0.285293  , -0.92219846,
        -3.66464968, -4.65504343, -3.6852583 , -3.03075496, -2.94992532]),
 'y': array([ 1.43536596, -0.00888262, -0.43698322,  0.34873741, -1.7755512 ,
        -2.36400169, -2.52402578, -3.63624128, -4.69795794,  1.75717991,
         1.54390771,  2.09838002, -0.69960211, -0.1160697 , -2.39822785,
        -1.64467986, -2.93510147, -1.54603605, -3.19855122, -5.62753367]),
 'z': array([-0.391573  ,  0.07055499,  0.01996148,  0.15071237, -0.15887744,
        -0.12284311, -1.53439479,  0.59849374,  1.16007514, -0.23294577,
        -1.45526931,  0.1612653 , -0.51631168,  1.11372614, -0.23792076,
         0.42759885, -1.47610251, -2.01980212, -2.13908595,  1.67327203]),
 'mode': 'markers',
 'marker': {'color': ['black',
   'black',
   'black',
   're

In [15]:
bond_trace

{'type': 'scatter3d',
 'x': [0.1541760192,
  -0.041131357400000006,
  None,
  -0.041131357400000006,
  -1.506305906,
  None,
  -1.506305906,
  -2.429675255,
  None,
  -1.709510989,
  -3.048218345,
  None,
  -3.048218345,
  -3.643539275,
  None,
  -2.972573225,
  -3.014136777,
  None,
  -1.506305906,
  -1.709510989,
  None,
  -3.048218345,
  -3.014136777,
  None,
  -0.041131357400000006,
  0.2852929984,
  None,
  -3.030754957,
  -3.643539275,
  None,
  0.1541760192,
  1.1870896290000001,
  None,
  0.1541760192,
  -0.5140793334,
  None,
  -0.041131357400000006,
  0.5756808563,
  None,
  -3.685258302,
  -3.643539275,
  None,
  -2.972573225,
  -2.949925318,
  None,
  0.1541760192,
  -0.0778880524,
  None,
  -1.709510989,
  -0.9221984595999999,
  None,
  -3.048218345,
  -3.6646496789999996,
  None,
  -4.655043433,
  -3.643539275,
  None],
 'y': [1.4353659619999999,
  -0.0088826206,
  None,
  -0.0088826206,
  -0.4369832199,
  None,
  -0.4369832199,
  0.3487374137,
  None,
  -1.775551204,
  -