In [95]:
import pandas as pd
import numpy as np
import mysql.connector
import logging
import re




In [46]:
# упаковки:
three_layer = [
    [0, 0, 0],
    [0, 0.5, 0.5],
    [0.5, 0, 0.5],
    [0.5, 0.5, 0]
]

oct_void = [
    [0.5, 0.5, 0.5],
    [0.5, 0, 0],
    [0, 0.5, 0],
    [0, 0, 0.5]
]

tetr_void = [
    [0.25, 0.25, 0.25],
    [0.25, 0.75, 0.75],
    [0.75, 0.25, 0.75],
    [0.75, 0.75, 0.25],
    [0.75, 0.75, 0.75],
    [0.75, 0.25, 0.25],
    [0.25, 0.75, 0.25],
    [0.25, 0.25, 0.75]
]

In [77]:
def get_system_from_cif(cif_file):
    #поиск блока с общей пст
    block_pattern = re.compile(r'loop_\s*_symmetry_equiv_pos_site_id\s*_symmetry_equiv_pos_as_xyz\s*(.*?)\n(?=loop_|$)', re.DOTALL)
    block_match = block_pattern.search(cif_file)
    
    if block_match:
        # Извлекаю найденный блок
        block_content = block_match.group(1)
        
        # Разделяю блок на строки
        lines = block_content.strip().split('\n')
        
        # извлекаю координаты
        coordinates = []
        for line in lines:
            # удаляю лишнее
            cleaned_line = re.sub(r'\d+\s', '', line).replace("'", "")
            # Добавляю координаты в список
            if cleaned_line.strip(): 
                coordinates.append(cleaned_line.strip())
        
        return coordinates
    else:
        return None

In [108]:

#Размножение координаты по ПСТ
def expand_positions(system_points, pos):
    x, y, z = pos
    new_coords = []
    for pos in system_points:
        new_pos = pos.replace('x', str(x)).replace('y', str(y)).replace('z', str(z))
        if '--' in new_pos: 
            new_pos = new_pos.replace('--','-') 
        parts = new_pos.split(',')
        new_part = []
        for part in parts:
            if '/' in part:
                denom = 2.0
                num = 1.0
                if '4' in part:
                    denom = 4.0
                if '3' in part:
                    num = 3.0
                if '+' in part:
                    sub_parts = part.split('+')
                    value = (num / denom) + float(sub_parts[1])
                elif '-' in part:
                    sub_parts = part.split('-')
                    value = (num / denom) - float(sub_parts[1])
            else:
                value = float(part)
            new_part.append(value)
        new_coords.append(new_part)
    return new_coords

In [49]:
def unique_positions(arr):
    unique_rows = np.unique(arr, axis=0)
    return unique_rows

In [50]:
def get_delta(orbit, pack):
    delta = [0.0, 0.0, 0.0]
    for o in orbit:
        for p in pack:
            delta[0] += abs(o[0] - p[0]) ** 2
            delta[1] += abs(o[1] - p[1]) ** 2
            delta[2] += abs(o[2] - p[2]) ** 2
    return delta

In [110]:
data = pd.read_csv('cubic_groups.csv')
logging.basicConfig(filename='PACK.log', level=logging.INFO)
#подключение к бд
conn = mysql.connector.connect(host='localhost', user='user', password='12345', database='NNCDB')
cursor = conn.cursor()
for i in range(100):
    item = data.iloc[i]
    logging.info(f'Молекула: {item["REFCODE"]}({item["Formula"]})')
    id = item['CCDC_ID']
    #запрашиваю cif-файл, чтобы взять общую ПСТ
    query = 'SELECT CIF_FILE FROM CIFS WHERE ID = %s'
    cursor.execute(query, (int(id),))
    resp_res = cursor.fetchall()   

    cif = resp_res[0][0]

    #получаю общую ПСТ
    psys = get_system_from_cif(cif)

    center_of_mass = [item['C_x'], item['C_y'], item['C_z']]

    #размножаю ЦМ по общей ПСТ
    orbit = expand_positions(psys, center_of_mass)

    #удаляю совпадения
    uniq_orbit = unique_positions(orbit)
    logging.info(f'Позиции ЦМ: \n{uniq_orbit}')

cursor.close()
conn.close()

INFO:root:Молекула: HEWMOL(C12 H24 O6 Tl1)
INFO:root:Позиции ЦМ: 
[[-0.624722 -0.624722  0.375279]
 [-0.624722 -0.375279  0.624722]
 [-0.624722 -0.374722  1.875279]
 [-0.624722  0.375279 -0.624722]
 [-0.624722  0.624722 -0.375279]
 [-0.624722  0.874722  1.124721]
 [-0.624722  1.124721  0.874722]
 [-0.624722  1.875279 -0.374722]
 [-0.375279 -0.624722  0.624722]
 [-0.375279 -0.374722  0.874722]
 [-0.375279  0.624722 -0.624722]
 [-0.375279  0.874722 -0.374722]
 [-0.374722 -0.624722  1.875279]
 [-0.374722 -0.375279  0.874722]
 [-0.374722 -0.374722  0.375279]
 [-0.374722  0.375279 -0.374722]
 [-0.374722  0.624722  1.124721]
 [-0.374722  0.874722 -0.375279]
 [-0.374722  1.124721  0.624722]
 [-0.374722  1.875279 -0.624722]
 [ 0.375279 -0.624722 -0.624722]
 [ 0.375279 -0.374722 -0.374722]
 [ 0.375279  0.624722  0.624722]
 [ 0.375279  0.874722  0.874722]
 [ 0.624722 -0.624722 -0.375279]
 [ 0.624722 -0.375279 -0.624722]
 [ 0.624722 -0.374722  1.124721]
 [ 0.624722  0.375279  0.624722]
 [ 0.62472