# Introduction
- 分子を `acs`を使って可視化
- URL: https://www.kaggle.com/kernels/scriptcontent/15173624/download

# Import everything I nead :)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from ase import Atoms
import ase.visualize
import networkx as nx

# Data Preparation

In [2]:
path = './data/champs-scalar-coupling/structures.csv'
df_struct = pd.read_csv(path)

path = './data/champs-scalar-coupling/train.csv'
df_train = pd.read_csv(path)

# data visualization

In [3]:
df_struct.head(10)

Unnamed: 0,molecule_name,atom_index,atom,x,y,z
0,dsgdb9nsd_000001,0,C,-0.012698,1.085804,0.008001
1,dsgdb9nsd_000001,1,H,0.00215,-0.006031,0.001976
2,dsgdb9nsd_000001,2,H,1.011731,1.463751,0.000277
3,dsgdb9nsd_000001,3,H,-0.540815,1.447527,-0.876644
4,dsgdb9nsd_000001,4,H,-0.523814,1.437933,0.906397
5,dsgdb9nsd_000002,0,N,-0.040426,1.024108,0.062564
6,dsgdb9nsd_000002,1,H,0.017257,0.012545,-0.027377
7,dsgdb9nsd_000002,2,H,0.915789,1.358745,-0.028758
8,dsgdb9nsd_000002,3,H,-0.520278,1.343532,-0.775543
9,dsgdb9nsd_000003,0,O,-0.03436,0.97754,0.007602


In [22]:
# Select a molecule
random_molecule = random.choice(df_struct['molecule_name'].unique())
molecule = df_struct[df_struct['molecule_name'] == random_molecule]
display(molecule)

Unnamed: 0,molecule_name,atom_index,atom,x,y,z
245832,dsgdb9nsd_015473,0,O,-0.083386,0.065901,0.320475
245833,dsgdb9nsd_015473,1,C,-0.105194,1.258943,0.215189
245834,dsgdb9nsd_015473,2,C,-1.350355,2.150734,0.075749
245835,dsgdb9nsd_015473,3,N,-0.884363,3.513918,-0.183782
245836,dsgdb9nsd_015473,4,C,0.452747,3.59886,0.421798
245837,dsgdb9nsd_015473,5,C,1.12325,2.216198,0.194573
245838,dsgdb9nsd_015473,6,C,2.182414,1.854511,1.111668
245839,dsgdb9nsd_015473,7,C,3.050462,1.578439,1.895533
245840,dsgdb9nsd_015473,8,H,-2.022944,1.77836,-0.700485
245841,dsgdb9nsd_015473,9,H,-1.897787,2.104605,1.028324


In [23]:
# Get atomic coordinates
atoms = molecule.iloc[:, 3:].values
print(atoms)

[[-0.08338637  0.0659007   0.32047505]
 [-0.10519409  1.25894312  0.21518912]
 [-1.35035467  2.15073436  0.07574886]
 [-0.8843634   3.51391805 -0.18378233]
 [ 0.4527466   3.59885976  0.42179793]
 [ 1.12324966  2.21619768  0.19457346]
 [ 2.18241391  1.85451082  1.11166795]
 [ 3.05046236  1.57843935  1.89553322]
 [-2.0229438   1.77835952 -0.70048534]
 [-1.89778737  2.10460533  1.02832427]
 [-0.80260117  3.65324485 -1.18580968]
 [ 1.02196509  4.42472261 -0.00734897]
 [ 0.35438482  3.78009899  1.49823042]
 [ 1.49809554  2.18019491 -0.84024725]
 [ 3.81941307  1.32310076  2.5818109 ]]


In [24]:
# Get atomic symbols
symbols = molecule.iloc[:, 2].values
print(symbols)

['O' 'C' 'C' 'N' 'C' 'C' 'C' 'C' 'H' 'H' 'H' 'H' 'H' 'H' 'H']


In [25]:
system = Atoms(positions=atoms, symbols=symbols)

ase.visualize.view(system, viewer="x3d")

# Summary

In [6]:
def view(molecule):
    # Select a molecule
    mol = df_struct[df_struct['molecule_name'] == molecule]
    
    # Get atomic coordinates
    xcart = mol.iloc[:, 3:].values
    
    # Get atomic symbols
    symbols = mol.iloc[:, 2].values
    
    # Display molecule
    system = Atoms(positions=xcart, symbols=symbols)
    print('Molecule Name: %s.' %molecule)
    return ase.visualize.view(system, viewer="x3d")

random_molecule = random.choice(df_struct['molecule_name'].unique())
view(random_molecule)

Molecule Name: dsgdb9nsd_101759.


---
**check coupling constant**

In [49]:
selected_idx =  df_train['molecule_name']==random_molecule
print(random_molecule)
print(len(selected_idx))
select_df_train = df_train[selected_idx]
select_df_train

dsgdb9nsd_055240
4658147


Unnamed: 0,id,molecule_name,atom_index_0,atom_index_1,type,scalar_coupling_constant
1697679,1697679,dsgdb9nsd_055240,9,0,1JHN,58.1314
1697680,1697680,dsgdb9nsd_055240,9,1,2JHC,-2.63722
1697681,1697681,dsgdb9nsd_055240,9,3,3JHC,4.50238
1697682,1697682,dsgdb9nsd_055240,9,10,2JHH,6.08805
1697683,1697683,dsgdb9nsd_055240,10,0,1JHN,58.9621
1697684,1697684,dsgdb9nsd_055240,10,1,2JHC,3.91606
1697685,1697685,dsgdb9nsd_055240,10,3,3JHC,0.37553
1697686,1697686,dsgdb9nsd_055240,11,0,3JHN,-0.032229
1697687,1697687,dsgdb9nsd_055240,11,1,2JHC,-1.95108
1697688,1697688,dsgdb9nsd_055240,11,3,1JHC,84.8963


In [50]:
print(select_df_train['atom_index_0'].unique())
print(select_df_train['atom_index_1'].unique())

[ 9 10 11 12 13 14 15]
[ 0  1  3 10  4  5  6 12 13 14 15]


---
**structure**

In [61]:
selected_idx = df_struct['molecule_name'] == random_molecule
    
# Get atomic symbols
symbols = df_struct[selected_idx]
symbols

Unnamed: 0,molecule_name,atom_index,atom,x,y,z
896197,dsgdb9nsd_055240,0,N,0.647517,-0.319263,0.49137
896198,dsgdb9nsd_055240,1,C,-0.217007,0.206028,-0.409664
896199,dsgdb9nsd_055240,2,O,0.067337,1.061718,-1.225247
896200,dsgdb9nsd_055240,3,C,-1.652756,-0.343492,-0.329518
896201,dsgdb9nsd_055240,4,C,-2.196456,-0.773423,-1.704721
896202,dsgdb9nsd_055240,5,C,-1.904889,-2.276444,-1.715053
896203,dsgdb9nsd_055240,6,C,-1.840747,-2.6552,-0.240899
896204,dsgdb9nsd_055240,7,O,-1.860131,-3.736319,0.264612
896205,dsgdb9nsd_055240,8,O,-1.73106,-1.509796,0.510214
896206,dsgdb9nsd_055240,9,H,1.588331,0.036807,0.524665


---

**相互作用をする原子の数を調べる**

In [77]:
atom_num = 10                                          # この数字がなんの原子にあたるかは、上のstructureを見よ 
idx = select_df_train['atom_index_0'] == atom_num
select_df_train['atom_index_1'][idx]

1697683    0
1697684    1
1697685    3
Name: atom_index_1, dtype: int64

---> この数は、上の3D図と見比べればわかるが、触れている数に相当する  
---> となると、

---

---> 上記2つのdataframeについて
---> 上のデータフレームは、カップリングコンスタントの組み合わせが書かれている  
---> 下のデータフレームは構造について。  
---> 上の表の、atom_index0は、Hのみを示している。  
---> atom_index_0, 1 双方にO(酸素)がないのは、$\gamma$が小さいからである。  
---> 相互作用の数は、H,N,O間の数だけあるはず