# QUBO models from GULP

- [Read the structures](#structures)
- [Geometry analysis](#geom_analysis)
- [Build the QUBO matrix](#build_qubo)
    - [Ewald](#ewald)
    - [Buckingham](#buckingham)
    

In [1]:
import numpy as np
import pandas as pd

from pymatgen.core.structure import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.core.periodic_table import Element
from pymatgen.io.cif import *

from ase.visualize import view


from pymatgen.io.ase import AseAtomsAdaptor
import sys

import re
import shutil as sh
import pickle


import copy
from sklearn.metrics import mean_squared_error 

#import dataframe_image as dfi

from scipy import constants

import matplotlib.pyplot as plt

import itertools
from itertools import chain

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error as mse


k_b = constants.physical_constants['Boltzmann constant in eV/K'][0]
# print(k_b)
def vview(structure):
    view(AseAtomsAdaptor().get_atoms(structure))

np.seterr(divide='ignore')
plt.style.use('tableau-colorblind10')

import seaborn as sns
from QG_functions import *

# <a id='structures'>Read the structures</a>

In [33]:
fully_lithiated_structure_init = Structure.from_file('data/fully_lithiated_tmp.cif')
delithiated_structure_init = Structure.from_file('data/delithiated_tmp.cif')


In [3]:
n_sites = fully_lithiated_structure_init.num_sites
fully_lithiated_structure_init.translate_sites(np.arange(n_sites),[1,1,1],to_unit_cell=True)

In [4]:
vview(fully_lithiated_structure_init)

## From database

data.keys() are int

In [6]:
with open('data/database/config_size_1.pkl', 'rb') as file:
    data = pickle.load(file)

# Print the loaded data to verify
data.keys()


### Oxygen and Mn deviation from lattice sites
- reorder sites using initial geom -> ordering vector


In [50]:
reference_coords_O = np.array([
    [0.02537, 0.099949, 0.250004],
    [0.432321, 0.349946, 0.083331],
    [0.525372, 0.150048, 0.250004],
    [0.932323, 0.400046, 0.083331],
    [0.251791, 0.155948, 0.083331],
    [0.205901, 0.405946, 0.250004],
    [0.751792, 0.094049, 0.083331],
    [0.705902, 0.344046, 0.250004],
    [0.02537, 0.599944, 0.250004],
    [0.432321, 0.849941, 0.083331],
    [0.525372, 0.650043, 0.250004],
    [0.932323, 0.90004, 0.083331],
    [0.251791, 0.655943, 0.083331],
    [0.205901, 0.90594, 0.250004],
    [0.751792, 0.594044, 0.083331],
    [0.705902, 0.844041, 0.250004],
    [0.02537, 0.099949, 0.583339],
    [0.432321, 0.349946, 0.416677],
    [0.525372, 0.150048, 0.583339],
    [0.932323, 0.400046, 0.416677],
    [0.251791, 0.155948, 0.416677],
    [0.205901, 0.405946, 0.583339],
    [0.751792, 0.094049, 0.416677],
    [0.705902, 0.344046, 0.583339],
    [0.02537, 0.599944, 0.583339],
    [0.432321, 0.849941, 0.416677],
    [0.525372, 0.650043, 0.583339],
    [0.932323, 0.90004, 0.416677],
    [0.251791, 0.655943, 0.416677],
    [0.205901, 0.90594, 0.583339],
    [0.751792, 0.594044, 0.416677],
    [0.705902, 0.844041, 0.583339],
    [0.02537, 0.099949, 0.916685],
    [0.432321, 0.349946, 0.750012],
    [0.525372, 0.150048, 0.916685],
    [0.932323, 0.400046, 0.750012],
    [0.251791, 0.155948, 0.750012],
    [0.205901, 0.405946, 0.916685],
    [0.751792, 0.094049, 0.750012],
    [0.705902, 0.344046, 0.916685],
    [0.02537, 0.599944, 0.916685],
    [0.432321, 0.849941, 0.750012],
    [0.525372, 0.650043, 0.916685],
    [0.932323, 0.90004, 0.750012],
    [0.251791, 0.655943, 0.750012],
    [0.205901, 0.90594, 0.916685],
    [0.751792, 0.594044, 0.750012],
    [0.705902, 0.844041, 0.916685]
])

In [128]:
oxygen_deviation = np.zeros((48,3))

num_samples = 100
structure_n = np.array(list(data.keys())[0:num_samples])

for i in structure_n:
    coords_tmp = np.array(data[i]['final']['config'][-48:])[:,2:]
    coords_tmp = np.array(coords_tmp,dtype='float')
#     print(np.std(coords_tmp-reference_coords_O,axis=1))
    
    oxygen_deviation += np.sqrt((coords_tmp-reference_coords_O)**2)
    num_samples += 1

oxygen_deviation /= num_samples
    
oxygen_deviation @ lattice

array([[0.03203483, 0.18634159, 0.02308549],
       [0.0348693 , 0.18389824, 0.02241988],
       [0.03200502, 0.16849573, 0.02100282],
       [0.0332788 , 0.1739045 , 0.02014861],
       [0.02040328, 0.02455105, 0.02635442],
       [0.02141431, 0.02870567, 0.02917853],
       [0.0172626 , 0.02069725, 0.02044549],
       [0.02119747, 0.02695922, 0.0298339 ],
       [0.02964018, 0.18150134, 0.02435589],
       [0.04460232, 0.1777624 , 0.01958426],
       [0.02884913, 0.17637615, 0.02178791],
       [0.03615568, 0.17285772, 0.01740068],
       [0.02246068, 0.02525063, 0.02160449],
       [0.02190663, 0.02176465, 0.0274155 ],
       [0.01745667, 0.02262662, 0.02706286],
       [0.01563787, 0.01836797, 0.02623249],
       [0.03512037, 0.18208821, 0.0234953 ],
       [0.03579535, 0.17841676, 0.02649308],
       [0.02985084, 0.17151691, 0.02333473],
       [0.04273307, 0.17054686, 0.02415248],
       [0.02149886, 0.02551386, 0.02571346],
       [0.01764809, 0.02571906, 0.03072196],
       [0.

# <a id='geom_analysis'>Geometry analysis</a>
- deviation around perfect lattice site per Li concentration (Mn and O)
- find grid of points where Li are more likely to sit (3D probability map) 
    - need to define a very dense grid
    - use symmetry?

## Read the structures

In [63]:
# Save the DataFrame in chunks

db_file = 'data/database/config_size_1.pkl'

chunksize = 1000
with pd.HDFStore(db_file) as store:
    for i, chunk in enumerate(pd.read_csv('large_data.csv', chunksize=chunksize)):
        store.put(f'data_chunk_{i}', chunk)

# Load and process each chunk
with pd.HDFStore(db_file) as store:
    for key in store.keys():
        chunk = store[key]
        # Process each chunk


# <a id='build_qubo'>Build the QUBO matrix</a>

# <a id='ewald'>Ewald</a>

### Ewald Summation Equations

#### Real Space Summation
The real space part of the Ewald summation for the interaction between particles $i$ and $j$ is given by:

$$
d_{ij}^{\text{real}} = \sum_{\mathbf{n} \neq 0} \frac{\text{erfc}(\alpha |\mathbf{r}_{ij} + \mathbf{n}|)}{|\mathbf{r}_{ij} + \mathbf{n}|} + \frac{\text{erfc}(\alpha |\mathbf{r}_{ij}|)}{|\mathbf{r}_{ij}|}
$$

#### Self-Interaction Correction
The self-interaction correction term for particle $i$ is:

$$
d_{ii}^{\text{self}} = -\frac{\alpha}{\sqrt{\pi}}
$$

#### Reciprocal Space Summation
The reciprocal space part of the Ewald summation for the interaction between particles $i$ and $j$ is given by:

$$
d_{ij}^{\text{reciprocal}} = \sum_{\mathbf{k} \neq 0} \frac{4 \pi}{V |\mathbf{k}|^2} \exp\left(-\frac{|\mathbf{k}|^2}{4 \alpha^2}\right) \cos(\mathbf{k} \cdot \mathbf{r}_{ij})
$$

#### Total Ewald Summation Matrix
The total Ewald summation matrix element $d_{ij}$ is the sum of the real space, reciprocal space, and self-interaction terms:

$$
d_{ij} = d_{ij}^{\text{real}} + d_{ij}^{\text{reciprocal}}
$$

$$
d_{ii} = d_{ii}^{\text{real}} + d_{ii}^{\text{self}}
$$

#### Potential Energy Calculation
The potential energy of the system is calculated as:

$$
E = \frac{1}{2} \sum_{i=1}^{N} \sum_{j=1}^{N} q_i q_j d_{ij}
$$


## From GULP

\documentclass{article}
\usepackage{amsmath}

\begin{document}

\begin{equation}
E_{\text{recip}} = \left(\frac{1}{2}\right) \frac{4\pi}{V} \sum_{\mathbf{G}} \frac{\exp\left(-\frac{G^2}{4\eta}\right)}{G^2} 
\times \sum_i \sum_j q_i q_j \exp\left(-i \mathbf{G} \cdot \mathbf{r}_{ij}\right)
\end{equation}

\begin{equation}
E_{\text{real}} = \frac{1}{2} \sum_i \sum_j q_i q_j \frac{\text{erfc}\left(\eta^{1/2} r_{ij}\right)}{r_{ij}}
\end{equation}

\end{document}


In [104]:
import numpy as np
import math

def compute_ewald_matrix(positions, lattice_vectors, alpha=None, real_depth=5, reciprocal_depth=5):
    """
    Compute the Ewald summation matrix for a system of particles.

    Parameters:
    positions (ndarray): Relative positions of particles (Nx3).
    lattice_vectors (ndarray): Lattice vectors of the unit cell (3x3).
    alpha (float): Ewald parameter controlling the split between real and reciprocal sums. If None, it's calculated.
    real_depth (int): Depth of the real space summation.
    reciprocal_depth (int): Depth of the reciprocal space summation.

    Returns:
    ndarray: Ewald summation matrix (NxN).
    """
    N = len(positions)
    
    # Calculate alpha if not provided
    if alpha is None:
        alpha = 2 / (np.linalg.det(lattice_vectors) ** (1.0 / 3))
    
    # Convert relative positions to absolute positions using lattice vectors
    positions = positions @ lattice_vectors
    
    # Initialize the Ewald summation matrix
    ewald_matrix = np.zeros((N, N))
    
    # Generate real space shifts for neighboring cells
    real_shifts = np.array([np.dot(np.array(shift) - real_depth, lattice_vectors)
                            for shift in np.ndindex(2 * real_depth + 1, 2 * real_depth + 1, 2 * real_depth + 1)
                            if shift != (real_depth, real_depth, real_depth)])
    
    # Real space summation
    for i in range(N):
        for j in range(i, N):
            if i != j:
                r_ij = positions[i] - positions[j]
                d_ij = np.linalg.norm(r_ij)
                ewald_matrix[i, j] += math.erfc(alpha * d_ij) / d_ij
                
                # Include contributions from neighboring cells
                for shift in real_shifts:
                    r_shifted = r_ij + shift
                    d_shifted = np.linalg.norm(r_shifted)
                    ewald_matrix[i, j] += math.erfc(alpha * d_shifted) / d_shifted
    
    # Self-interaction term correction
    for i in range(N):
        ewald_matrix[i, i] -= alpha / math.sqrt(math.pi)
    
    # Generate reciprocal space shifts for the Fourier transform contributions
    reciprocal_vectors = 2 * np.pi * np.linalg.inv(lattice_vectors).T
    reciprocal_shifts = np.array([np.dot(np.array(shift) - reciprocal_depth, reciprocal_vectors)
                                  for shift in np.ndindex(2 * reciprocal_depth + 1, 2 * reciprocal_depth + 1, 2 * reciprocal_depth + 1)
                                  if shift != (reciprocal_depth, reciprocal_depth, reciprocal_depth)])
    
    # Reciprocal space summation
    for i in range(N):
        for j in range(i, N):
            for k in reciprocal_shifts:
                k_norm = np.linalg.norm(k)
                if k_norm > 0:
                    k_dot_r = np.dot(k, positions[j] - positions[i])
                    term = (4 * math.pi / (np.linalg.det(lattice_vectors) * k_norm**2)) * \
                           math.exp(-k_norm**2 / (4 * alpha**2)) * math.cos(k_dot_r)
                    ewald_matrix[i, j] += term
    
    # Convert to electrostatic potential energy (unit conversion)
    ewald_matrix *= 14.399645351950543  # Convert to appropriate units, e.g., eV for electrostatic potential
    
    # Symmetry completion: Ensure the matrix is symmetric
    for i in range(N):
        for j in range(i):
            ewald_matrix[i, j] = ewald_matrix[j, i]
    
    return ewald_matrix

def calculate_potential_energy(ewald_matrix, charges):
    """
    Calculate the potential energy of the system given the Ewald summation matrix and charges.

    Parameters:
    ewald_matrix (ndarray): Ewald summation matrix (NxN).
    charges (ndarray): Charges of the particles (N).

    Returns:
    float: Total potential energy of the system.
    """
#     return 0.5 * np.sum(charges[:, np.newaxis] * charges[np.newaxis, :] * ewald_matrix)
    return  charges[:, np.newaxis] * charges[np.newaxis, :] * ewald_matrix

# # Example usage:
# positions = np.array([[0.0, 0.0, 0.0], [0.5, 0.5, 0.5]])  # Example positions in fractional coordinates
# lattice_vectors = np.eye(3)  # Example lattice vectors (unit cube)

# # Compute the Ewald matrix once
# ewald_matrix = compute_ewald_matrix(positions, lattice_vectors)

# # Different sets of charges
# charges_list = [
#     np.array([1.0, -1.0]),
#     np.array([1.0, 1.0]),
#     np.array([-1.0, -1.0])
# ]

# # Calculate potential energy for each set of charges
# for charges in charges_list:
#     potential_energy = calculate_potential_energy(ewald_matrix, charges)
#     print(f"Charges: {charges},\n Potential Energy:\n {potential_energy}")


# <a id='buckingham'>Buckingham</a>

WIP

- Use the distance matrix
- All atom QUBO:
    - Qii = 0
    - Qij = Li-other BP or (Mn3+-other - Mn4+-other) or just take Mn4+-other as the 0 energy

# Build initial QUBO

### Fully lithiated

In [110]:
n_sites = fully_lithiated_structure_init.num_sites
cart_coords = fully_lithiated_structure_init.cart_coords
lattice = fully_lithiated_structure_init.lattice.matrix
atomic_numbers = np.array(fully_lithiated_structure_init.atomic_numbers)

charges_dict = {3:+1,
                43:+3,
                8:-2
}


charges = np.zeros(n_sites)
for atom_type in charges_dict.keys():
    index = np.where(atomic_numbers == atom_type)[0]
    charges[index] = charges_dict[atom_type]

    
Q = np.zeros((n_sites,n_sites))

# Ewald
ewald_matrix_tmp_fl = compute_ewald_matrix(cart_coords, lattice, alpha=0.5)
ewald_matrix_fl = calculate_potential_energy(ewald_matrix_tmp_fl,charges)

In [111]:
ewald_matrix_tmp_fl[0]

array([ 7.60181078e-03, -6.67599596e-01, -4.50304657e-01, -6.67599596e-01,
        7.58015497e+00,  3.83208558e+00, -6.67608494e-01, -5.89959420e-01,
       -6.63120310e-01,  3.78765879e+00, -6.63120310e-01,  3.96782961e+00,
        3.10044217e+00, -4.58125507e-01, -5.95731853e-01,  6.71856331e+01,
       -6.67608494e-01, -5.95720416e-01,  7.43152198e+00,  3.70938154e+00,
       -4.58141023e-01,  1.81006077e+01,  2.28688771e+00,  1.71045671e+01,
        5.42832555e-01, -9.66097849e-01, -1.01052056e+00,  8.89845445e-01,
        1.77387192e+00,  1.85738298e+00, -9.87291675e-01, -9.06313681e-01,
       -9.66093139e-01,  6.52070271e-01,  1.40274093e+00, -1.01259045e+00,
       -9.03531416e-01, -9.63723055e-01, -9.06308163e-01,  1.87440080e+00,
       -1.01259457e+00, -9.87296100e-01,  8.98474979e-01,  7.42246133e-01,
        1.75783574e+00,  6.44978851e-01,  1.34126521e+00, -9.85065537e-01,
       -7.73947531e-01, -1.36745585e-02, -2.00217895e-01, -4.83899241e-01,
        2.23079839e-01, -

In [103]:
np.sum(ewald_matrix)*0.5

39472.19246172601

In [131]:
n_sites = delithiated_structure_init.num_sites
cart_coords = delithiated_structure_init.cart_coords
lattice = delithiated_structure_init.lattice.matrix
atomic_numbers = np.array(delithiated_structure_init.atomic_numbers)

charges_dict = {3:+1,
                25:+4,
                8:-2
}


charges = np.zeros(n_sites)
for atom_type in charges_dict.keys():
    index = np.where(atomic_numbers == atom_type)[0]
    charges[index] = charges_dict[atom_type]

    
Q = np.zeros((n_sites,n_sites))

# Ewald
ewald_matrix_tmp_dl = compute_ewald_matrix(cart_coords, lattice, alpha=0.5)
ewald_matrix_dl = calculate_potential_energy(ewald_matrix_tmp_dl,charges)

In [132]:
ewald_matrix_dl

array([[ -3.94207046,  -8.5137043 ,  -4.18637235, ...,   4.34807324,
          7.20002023, -14.90705521],
       [ -8.5137043 ,  -3.94207046,  -9.29900818, ...,  -4.958168  ,
          1.97630081,   5.73421612],
       [ -4.18637235,  -9.29900818,  -3.94207046, ...,  -0.50728862,
          1.93880285,   1.65808927],
       ...,
       [  4.34807324,  -4.958168  ,  -0.50728862, ...,  -0.98551762,
          0.52923035,  -2.90092982],
       [  7.20002023,   1.97630081,   1.93880285, ...,   0.52923035,
         -0.98551762,  -3.66885928],
       [-14.90705521,   5.73421612,   1.65808927, ...,  -2.90092982,
         -3.66885928,  -0.98551762]])

## Extra functions

In [136]:
import numpy as np
from scipy.special import erfc
import math

def compute_ewald_matrix(positions, lattice_vectors, alpha=None, real_depth=5, reciprocal_depth=5):
    """
    Compute the Ewald summation matrix for a system of particles.

    Parameters:
    positions (ndarray): Relative positions of particles (Nx3).
    lattice_vectors (ndarray): Lattice vectors of the unit cell (3x3).
    alpha (float): Ewald parameter controlling the split between real and reciprocal sums. If None, it's calculated.
    real_depth (int): Depth of the real space summation.
    reciprocal_depth (int): Depth of the reciprocal space summation.

    Returns:
    ndarray: Ewald summation matrix (NxN).
    """
    N = len(positions)
    
    # Calculate alpha if not provided
    if alpha is None:
        alpha = 2 / (np.linalg.det(lattice_vectors) ** (1.0 / 3))
    
    # Convert relative positions to absolute positions using lattice vectors
    positions = positions @ lattice_vectors
    
    # Initialize the Ewald summation matrix
    ewald_matrix = np.zeros((N, N))
    
    # Real space summation
    for i in range(N):
        for j in range(i + 1, N):
            r_ij = positions[i] - positions[j]
            d_ij = np.linalg.norm(r_ij)
            ewald_matrix[i, j] = erfc(alpha * d_ij) / d_ij
            
            # Generate real space shifts for neighboring cells
            for shift in np.ndindex(2 * real_depth + 1, 2 * real_depth + 1, 2 * real_depth + 1):
                if shift != (real_depth, real_depth, real_depth):
                    shift_vector = (np.array(shift) - real_depth) @ lattice_vectors
                    r_shifted = r_ij + shift_vector
                    d_shifted = np.linalg.norm(r_shifted)
                    ewald_matrix[i, j] += erfc(alpha * d_shifted) / d_shifted

    # Self-interaction term correction
    for i in range(N):
        ewald_matrix[i, i] -= alpha / math.sqrt(math.pi)

    # Reciprocal space summation
    reciprocal_vectors = 2 * np.pi * np.linalg.inv(lattice_vectors).T
    for i in range(N):
        for j in range(i, N):
            for shift in np.ndindex(2 * reciprocal_depth + 1, 2 * reciprocal_depth + 1, 2 * reciprocal_depth + 1):
                if shift != (reciprocal_depth, reciprocal_depth, reciprocal_depth):
                    k = (np.array(shift) - reciprocal_depth) @ reciprocal_vectors
                    k_norm = np.linalg.norm(k)
                    if k_norm > 0:
                        k_dot_r = np.dot(k, positions[j] - positions[i])
                        term = (4 * math.pi / (np.linalg.det(lattice_vectors) * k_norm**2)) * \
                               math.exp(-k_norm**2 / (4 * alpha**2)) * math.cos(k_dot_r)
                        ewald_matrix[i, j] += term
    
    # Convert to electrostatic potential energy (unit conversion)
    ewald_matrix *= 14.399645351950543  # Convert to appropriate units, e.g., eV for electrostatic potential
    
    # Symmetry completion: Ensure the matrix is symmetric
    for i in range(N):
        for j in range(i):
            ewald_matrix[i, j] = ewald_matrix[j, i]
    
    return ewald_matrix

def calculate_potential_energy(ewald_matrix, charges):
    """
    Calculate the potential energy of the system given the Ewald summation matrix and charges.

    Parameters:
    ewald_matrix (ndarray): Ewald summation matrix (NxN).
    charges (ndarray): Charges of the particles (N).

    Returns:
    float: Total potential energy of the system.
    """
    return charges[:, np.newaxis] * charges[np.newaxis, :] * ewald_matrix


n_sites = delithiated_structure_init.num_sites
cart_coords = delithiated_structure_init.cart_coords
lattice = delithiated_structure_init.lattice.matrix
atomic_numbers = np.array(delithiated_structure_init.atomic_numbers)

charges_dict = {3:+1,
                25:+4,
                8:-2
}


charges = np.zeros(n_sites)
for atom_type in charges_dict.keys():
    index = np.where(atomic_numbers == atom_type)[0]
    charges[index] = charges_dict[atom_type]

    
Q = np.zeros((n_sites,n_sites))

# Ewald
# ewald_matrix_tmp_dl = compute_ewald_matrix(cart_coords, lattice, alpha=0.5)
ewald_matrix_dl = calculate_potential_energy(ewald_matrix_tmp_dl,charges)

SyntaxError: invalid character '÷' (U+00F7) (1416449725.py, line 108)

In [135]:
ewald_matrix_dl

1649.773429867818

In [120]:
import numpy as np

# Sample arrays
array1 = np.array([[3, 1, 2], [6, 5, 4], [9, 8, 7]])
array2 = np.array([[9, 8, 7], [3, 1, 2], [6, 5, 4]])

def find_reordering_index(arr1, arr2):
    # Ensure that each row can be uniquely identified by sorting each row lexicographically
    arr1_sorted_indices = np.lexsort(arr1.T)
    arr2_sorted_indices = np.lexsort(arr2.T)
    
    # Get the sorted arrays
    arr1_sorted = arr1[arr1_sorted_indices]
    arr2_sorted = arr2[arr2_sorted_indices]
    
    # Create a mapping from sorted arr2 to sorted arr1 indices
    mapping = np.argsort(arr2_sorted_indices)
    
    # Reorder the indices to match the original arr2
    reordering_indices = arr1_sorted_indices[mapping]
    
    return reordering_indices

# Get the reordering indices
reordering_indices = find_reordering_index(array1, array2)

# Reorder array1 to match array2
reordered_array1 = array1[reordering_indices]

print("Reordered array1 to match array2:")
print(reordering_indices)


Reordered array1 to match array2:
[2 0 1]
