In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib widget
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.pyplot as plt
import chemiscope
from widget_code_input import WidgetCodeInput
from ipywidgets import Layout, Output, Textarea
from scwidgets import (AnswerRegistry, TextareaAnswer, CodeDemo,
                       ParametersBox, PyplotOutput, ClearedOutput,
                       AnimationOutput,CheckRegistry,Answer)
import ase
from ase.io import read, write

In [None]:
#### AVOID folding of output cell 

In [None]:
%%html
<style>
.jp-CodeCell.jp-mod-outputsScrolled .jp-Cell-outputArea  {  height:auto !important;
    max-height: 5000px; overflow-y: hidden }
</style>
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height:4000px;  /* your desired max-height here */
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

Please enter your name as `SurnameName` to initialize the answer file. 

In [None]:
check_registry = CheckRegistry() 
answer_registry = AnswerRegistry(prefix="module_01")
display(answer_registry)

You can write here general comments you may have on this module. 

In [None]:
module_summary = TextareaAnswer("General comments on this module", layout=Layout(width="100%"))
answer_registry.register_answer_widget("module-summary", module_summary)
display(module_summary)

# Atomic structures on a computer

In an atomistic description of matter, a configuration of a structure is entirely determined by

* The chemical nature of the atoms, $a_i$

* Their positions, $\mathbf{r}_i$, corresponding to a list of $(x,y,z)$ Cartesian coordinates

* Possibly, three unit-cell vectors $\mathbf{h}_{i=1,2,3}$ that descibe the periodicity of a lattice

There is a [babel of formats](http://openbabel.org/), often poorly standardized, that have been developed to store atomic configurations. Notable examples are `pdb` files, used for biological structures (e.g. in the [protein data bank](https://www.rcsb.org/)), `cif` files that are often used to store crystallographic data (the main format for the [Cambridge structural database](https://www.ccdc.cam.ac.uk/solutions/csd-core/components/csd/) and the `xyz` format, one of the simplest (and most abused) formats, in which atomic positions are stored according to the schema

```
N_ATOMS
comment line
Element X Y Z
...
```

The comment line is often abused to add further information, e.g. the lattice parameters following the format 

```
Lattice='h1x h1y h1z h2x h2y h2z h3x h3y h3z'
```

Multiple blocks corresponding to different structures can be simply concatenated -- although many programs assume all structures in a single file to have the same number and type of atoms. 

An example of the content of an `xyz` file (the QM7b dataset, from [DOI: 10.1088/1367-2630/15/9/095003](https://doi.org/10.1088/1367-2630/15/9/095003) ):

```
!head -n 17 data/qm7b-ase.xyz
```

In [None]:
!head -n 17 data/qm7b-ase.xyz

<span style="color:blue">**01** What is the chemical formula of these two structures? Can you also guess what actual molecules they correspond to? If you can't figure it out by looking at the coordinates, we'll look at this file later on, so you can "cheat" if you can't see the structure based on the distance!</span>

In [None]:
ex01_txt = TextareaAnswer("Enter your answer", layout=Layout(width="100%"))
answer_registry.register_answer_widget("ex01-answer", ex01_txt)
display(ex01_txt)

# Loading and defining atomic structures with ASE

In this course we will use the [Atomic Simulation Environment](https://wiki.fysik.dtu.dk/ase/) to load and manipulate structures. ASE stores structures in an `Atoms` class, which contains `positions`, `symbols` and `cell` members. Atomic positions are typically interpreted to be expressed in Ångstrom ($10^{-10}$m).

Structures can be loaded from disk using the `read` command (from `ase.io`)

```
# the second argument determines the slice of the file that will be read (e.g. 0 to load the first frame)
# it can be either a python slice() or a string representation with the usual start[:end][:stride] format
qm7 = read("data/qm7b-ase.xyz", ":")  
```

or created manually

```
methane = ase.Atoms(symbols="CH4", positions=[ 
    [1.00, -0.00, -0.00], 
    [2.09, 0.00,  0.00], 
    [0.63,  1.03,  0.00], 
    [0.63, -0.53,  0.88],
    [0.64, -0.51, -0.91]]
    )
```

The atomic positions, labels, or the unit cell can also be modified as common arrays

```
methane.symbols[1] = "Cl"   # turn the molecule into chloromethane
```

*NB:*
1. the frame indices are 0-based
2. atoms indices are 0-based
3. symbols, positions and cell can be manipulated as arrays, but implement some syntactic sugar, e.g. you can set symbols in compact, string form, e.g. `methane.symbols = "CH4"`
See the [documentation for the `ase.Atoms` object](https://wiki.fysik.dtu.dk/ase/ase/atoms.html) for more details. 

<span style="color:blue"> **02** Write a function that loads the structure with index 1 from the `data/qm7b-ase.xyz`. What is it? Modify the structure so it corresponds to methylammonium, $\mathrm{CH_3NH_3^+}$, one of the organic cations used in [hybrid perovskite solar cells](https://en.wikipedia.org/wiki/Methylammonium_lead_halide).
Get a nice snapshot of the structure! </span> <br>

You can also play around with the function to load the full file (use `index=":"` as an option) and visualize the 7000-something molecules in the QM7 dataset. Experiment with the visualization options of the widget.

In [None]:
def fingerprint_ase_atoms(atoms):
    return np.sum(atoms.get_all_distances()*atoms.numbers)

def chemiscope_update_visualizers_structure(code_input, visualizers):
    cleared_output = visualizers[0]
    structure = code_input.get_function_object()()
    if len(structure) != 0: # silently reject a void code_input
        cleared_output = visualizers[0]
        with cleared_output:
            chemiscope_widget = chemiscope.show(frames = [structure], properties=chemiscope.extract_properties([structure]), mode="structure")
            display(chemiscope_widget)

def fingerprint_ex02(output):
    assert isinstance(output, ase.Atoms), f"TypeAssert failed: Expected type ase.Atoms but got {type(output)}."
    assert len(output) == 8, f"LenAssert failed: Expected length {8} but got {len(output)}."
    assert np.sum(7 == output.numbers) == 1, f"One nitrogen is expected in the molecule. Found {np.sum(7 == output.numbers)} nitrogen(s)."
    assert np.sum(1 == output.numbers) == 6, f"Six hydrogens are expected in the molecule. Found {np.sum(1 == output.numbers)} hydrogen(s)."
    
    return fingerprint_ase_atoms(output)

ex02_wci = WidgetCodeInput(
        function_name="methylammonium", 
        function_parameters="",
        docstring="""
Loads the structure #1 from the data/qm7b-ase.xyz file, 
and modifies the composition so that it corresponds to CH3NH3+. 

:return: an ASE atoms object that describes the molecular structure
""",
            function_body="""
# Write your solution, then click on the button below to update the plotter 
# and check against the reference value

import ase
from ase.io import read

structure = []  # load here

# modify the structure ...

return structure
"""
)

In [None]:
def ex02_updater(code_input,visualizers):
    structure = code_input.get_function_object()()
    if type(structure) is ase.Atoms: # chemiscope.show expects a list
        structure = [structure]
        
    if len(structure) != 0: # silently reject a void code_input
        clear_output = visualizers[0]
        with clear_output:
            cs_widget = chemiscope.show(frames = structure, properties=chemiscope.extract_properties(structure), mode="structure")
            display(cs_widget)

ex02_code_demo = CodeDemo(code_input= ex02_wci,
                          check_registry=check_registry,
                          visualizers = [ClearedOutput()], 
                          update_visualizers = chemiscope_update_visualizers_structure
                          )

check_registry.add_check(ex02_code_demo,
                         inputs_parameters=[{}],
                         reference_outputs=[237.7334114111937],
                         fingerprint=fingerprint_ex02
                         ) 
answer_registry.register_answer_widget("ex02-function", ex02_code_demo)

ex02_code_demo.run_and_display_demo()



# Unit cell and periodic structures

The ASE format and the structure viewer allows also to manipulate periodic structures corresponding to bulk materials. To do so, you need to set the `cell` member of an `Atoms` structure to contain the (row-major) cell matrix. ASE considers separately the information on whether the unit cell should be considered as indicating just a finite volume that contains the atoms, or as a periodic repeat unit: this is controlled by the `pbc` parameter - standing for Periodic Boundary Conditions.

Polonium is the only element that crystallizes in a simple-cubic structure. It has a density of 9.196 g/cm<sup>3</sup>. The isotope of polonium that can be isolated from uranium ores is $\mathrm{^{210} Po}$, that has a molar mass of 210 g/mol. Consider that one mole contains `6.02214076e23` atoms.

<span style="color:blue">**03** What is the lattice parameter of simple-cubic Po? Write a function that returns an `Atoms` object that describes a single unit cell of Po, with one atom at the origin, and take a snapshot! </span>

Take this opportunity to experiment with the visualization options for crystalline structures: by clicking on the ☰ icon, you can choose to visualize the unit cell, and replicate the cell multiple times along the three axes. 

In [None]:
ex03_wci = WidgetCodeInput(
        function_name="polonium", 
        function_parameters="",
        docstring="""
Build a unit cell of simple-cubic, alpha-Po. 

:return: an ASE atoms object that describes the unit cell structure
""",
            function_body="""
# Write your solution, then click on the button below to update the plotter 
# and check against the reference value

import ase
from ase.io import read

a0 = 0. # lattice parameter
# complete the call, substituting placeholders with actual values
structure = ase.Atoms(symbols="...", positions= ... , 
                      cell= [ [...], ...] , 
                      pbc=True)

return structure
"""
        )

In [None]:
def ex03_updater(code_input,visualizers):
    structure = code_input.get_function_object()()
    if len(structure) != 0: # silently reject a void code_input
        clear_output = visualizers[0]
        with clear_output:
            cs_widget = chemiscope.show(frames = structure, mode="structure", 
                                properties={'dummy' : {"target":"structure", 'values': [0]}} )
            display(cs_widget)
def ex03_fingerprint(output):
    assert isinstance(output, ase.Atoms), f"TypeAssert failed: Expected type ase.Atoms but got {type(output)}."
    assert len(output) == 1, f"LenAssert failed: Expected length 1 but got {len(output)}."
    assert np.sum(84 == output.numbers) == 1, f"One Polonium is expected in the molecule. Found {np.sum(84 == output.numbers)} Polonium atom(s)."
    assert np.allclose(output.cell,3.3596173*np.eye(3)), f"Unit cell has wrong dimensions."
    return fingerprint_ase_atoms(output)
 
    
ex03_code_demo = CodeDemo(code_input= ex03_wci,
                          check_registry=check_registry,
                          visualizers = [ClearedOutput()],
                          update_visualizers = chemiscope_update_visualizers_structure)

check_registry.add_check(ex03_code_demo,
                         inputs_parameters=[{}],
                         reference_outputs=[0.0],
                         fingerprint=ex03_fingerprint) 

ex03_code_demo.run_and_display_demo()

answer_registry.register_answer_widget("ex03-function", ex03_code_demo)

In [None]:
ex03_txt = TextareaAnswer("Enter your answer here", layout=Layout(width="100%"))
answer_registry.register_answer_widget("ex03-answer", ex03_txt)
display(ex03_txt)

# Periodic-boundary conditions, supercells, wrapping structures

Periodic boundary conditions are not only used to model perfect crystals. They are also used as a practical way to describe bulk systems, while using a finite number of atomic degrees of freedom: the size of the cell and the coordinates of the atoms in a single repeate unit. Compare these two systems:

a) a finite-sized droplet with 10 water molecules
<img src="figures/pbc-1.png" width="400"/>
b) a periodic system with a repeat unit of 10 water molecules
<img src="figures/pbc-2.png" width="400"/>

<span style="color:blue">**04** Discuss in which ways the two scenarios differ from bulk water: these are usually referred to as _finite-size effects._ You can think of the impact of having just a finite number of water molecules in terms of the atomic environment "seen" by each water molecule, or discuss in more macroscopic terms based on bulk and interfaces. Which of the two cases would you expect to be closer to the limit of a large number of water molecules? </span>

In [None]:
ex04_txt = TextareaAnswer("Enter your answer here", layout=Layout(width="100%"))
answer_registry.register_answer_widget("ex04-answer", ex04_txt)
display(ex04_txt)

We will see later how we can define and compute interactions in a periodic system such as this. For the moment, let's look at a snapshot from a real simulation of liquid water, with a supercell containing 32 water molecules. You'll need to load the file `data/h2o-32-snapshot.xyz`, and return it as an `Atoms` object. Switch on the unit cell visualization and look at the position of the water molecules relative to it. Does this look at all like liquid water?

<span style="color:blue">**05** After you have looked at the original structure, write code to "fold" the coordinates of the atoms so that they are within the unit cell. Some molecules will be "broken" across cell boundaries - you can see what happens when you visualize multiple periodic replicas, using the `supercell` options in the visualizer. Does the structure still describe the same infinite periodic bulk material? </span>

In [None]:
ex05_wci = WidgetCodeInput(
        function_name="water_pbc", 
        function_parameters="",
        docstring="""
Loads data/h2o-32-snapshot.xyz as an ase.Atoms object, folds the atomic
coordinates into the supercell, and returns it so it can be visualized 

:return: an ASE atoms object containing the atomic coordinates "folded" 
     into the unit cell.
""",
            function_body="""
# Write your solution, then click on the button below to update the plotter 
# and check against the reference value

import ase
from ase.io import read

# complete the call, substituting placeholders with actual values
structure = read( ... )

# add here code to wrap the structure. you'll need to use the cell parameters and the 
# atomic positions. 
# NB1: ASE has a wrap() method 
# structure.wrap()
# you can use it to get an idea of what should happen, but you'll have to implement
# wrapping yourself
# NB2: the positions of each atom should be modified, shifting them by an integer number
#      of lattice parameters so they are between 0 and the lattice parameter.
# NB3: you can (and should) exploit the fact that the supercell is cubic, but if you
#     want extra brownie points you can try a general version. 

# structure.positions = ....

return structure
"""
        )


In [None]:
def ex05_updater(code_input,visualizers):
    structure = code_input.get_function_object()()
        
    if len(structure) != 0: # silently reject a void code_input
        clear_output = visualizers[0]
        with clear_output:
            cs_widget = chemiscope.show(frames = [ structure ], mode="structure", 
                                        properties={'dummy' : {"target":"structure", 'values': [0]}} )
            display(cs_widget)
    
def ex05_fingerprint(output):
    # reproduce default asserts here
    assert isinstance(output, ase.Atoms), f"TypeAssert failed: Expected type ase.Atoms but got {type(output)}."
    assert len(output) == 96, f"LenAssert failed: Expected length 96 but got {len(output)}."
    return fingerprint_ase_atoms(output)

ex05_code_demo = CodeDemo(code_input= ex05_wci,
                          check_registry=check_registry,
                          visualizers = [ClearedOutput()],
                          update_visualizers = chemiscope_update_visualizers_structure
)

check_registry.add_check(ex05_code_demo,
                         inputs_parameters=[{}],
                         reference_outputs=[199303.2380212066],
                         fingerprint=ex05_fingerprint,
                         equal=np.allclose
                         ) 

ex05_code_demo.run_and_display_demo()
answer_registry.register_answer_widget("ex05-function", ex05_code_demo)

In [None]:
ex05_txt = TextareaAnswer("Enter your answer here", layout=Layout(width="100%"))
answer_registry.register_answer_widget("ex05-answer", ex05_txt)
display(ex05_txt)