# Tools test

In [1]:
import numpy as np
import pandas as pd
import math
import json
import re
import os
from pymatgen.core.structure import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from dotenv import load_dotenv
from pymatgen.core.composition import Composition

In [2]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

OPENAI_MODEL="gpt-4o" # "gpt-3.5-turbo", "gpt-4o-mini", "gpt-4o"
# see the list of availible models https://platform.openai.com/docs/models
load_dotenv()
openai_api_key = os.environ.get('OPENAI_API_KEY')

In [75]:
from typing import Annotated, TypedDict
from langchain.agents import tool

@tool
def dir_name(pseudo_potentials: str,
             functional: str, 
             mode: str) -> str:
    '''
    Function to determine the path to the folder with the correct pseudopotentials
    Args: 
         pseudo_potentials: str, name of the parent forlder with pseudopotentials
         functional: str, name of the DFT functional
         mode: str, mode for pseudopotential, list of possible values: ["efficiency", "precision"]
    '''
    list_of_subfolders=os.listdir(pseudo_potentials)
    for subfolder in list_of_subfolders:
        if(re.search(functional.lower()+"_", subfolder.lower()) and re.search(mode.lower(), subfolder.lower())):
            return pseudo_potentials+subfolder+"/"

@tool
def list_of_pseudos(pseudo_potentials: str, 
                    functional: str,
                    mode: str, 
                    compound: str) -> list:
    '''
    Function to determine the list of names of files with pseudopotentials for the compound
    Args:
        pseudo_potentials: str, name of the parent forlder with pseudopotentials
        functional: str, name of the DFT functional
        mode: str, mode for pseudopotential, list of possible values: ["efficiency", "precision"]
        compound: str, composition of the compound
    '''
    list_of_subfolders=os.listdir(pseudo_potentials)
    for subfolder in list_of_subfolders:
        if(re.search(functional.lower()+"_", subfolder.lower()) and re.search(mode.lower(), subfolder.lower())):
            list_of_files=os.listdir(pseudo_potentials+subfolder+"/")
    list_of_element_files=[]
    for file in list_of_files:
        for element in Composition(compound).elements:
            element=str(element).lower()
            if(file[:len(element)].lower()==element and not file[len(element):len(element)+1].lower().isalpha()):
                list_of_element_files.append(file)
                
    return list_of_element_files

@tool
def cutoff_limits(pseudo_potentials_cutoffs: str, 
                  functional: str,
                  mode: str,
                  compound: str) -> Dict:
    '''
    Function to determine the maximum energy cutoff and density cutoff possible based on cutoff values specified for pseudopotentials
    Args:
        pseudo_potentials_cutoffs: str, the main folder with pseudopotential cutoffs
        functional: str, name of the DFT functional
        mode: str, mode for pseudopotential, list of possible values: ["efficiency", "precision"]
        compound: str, composition of the compound
    Output:
        Dictionary with keys 'max_ecutwfc' and 'max_ecutrho' and float values
    '''
    list_of_cutoff_files=os.listdir(pseudo_potentials_cutoffs)
    for file in list_of_cutoff_files:
        if(re.search(functional.lower()+"_", file.lower()) and re.search(mode.lower(), file.lower())):
            try:
                with open(pseudo_potentials_cutoffs+file, "r") as f:
                    cutoffs=json.load(f)
            except:
                cutoffs={}
    elements=[str(el) for el in Composition(compound).elements]
    if(cutoffs!={}):
        subset={key:cutoffs[key] for key in elements}
        encutoffs=[subset[i]['cutoff_wfc'] for i in subset.keys()]
        rhocutoffs=[subset[i]['cutoff_rho'] for i in subset.keys()]
        max_ecutoff=min(encutoffs)
        max_rhocutoff=min(rhocutoffs)
    else:
        max_ecutoff=np.nan
        max_rhocutoff=np.nan
    return { 'max_ecutwfc': max_ecutoff, 'max_ecutrho': max_rhocutoff}

def generate_kpoints_grid(lattice, k_density):
    #kpoints = Kpoints.automatic_density(lattice, k_density)
    kpoints = [math.ceil(k_density/x) for x in lattice.abc]
    kpoints.extend([0,0,0])
    return points

def get_structure(cif_file, primitive=False):
    struct = Structure.from_file(cif_file)
    compound=Structure.from_file(cif_file).formula
    if(not primitive):
        return str(struct), compound
    else:
        primitive=SpacegroupAnalyzer(struct)
        primitive=primitive.get_primitive_standard_structure()
        compound=primitive.formula
        return str(primitive), compound


In [76]:
llm = ChatOpenAI(model=OPENAI_MODEL)
llm_with_tools = llm.bind_tools([dir_name,list_of_pseudos,cutoff_limits])

In [77]:
output_dir="generated_files/"
structure_dir="structure_files/"
pseudo_potentials_folder = "pseudo/"
pseudo_potentials_cutoffs_folder = "pseudo_cutoffs/"
compound = "NaCl"
cif_file=structure_dir+"ICSD_CollCode51182.cif"
functional = "PBE" # the available values are "PBE" and "PBESol"
mode = "efficiency" # the available values "efficiency" and "precision"

In [None]:
try:
    compound = Structure.from_file(cif_file).formula
except: 
    pass

In [78]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "human", "Can you determine the directory with pseudopotentials for NaCl for {functional} functional and {mode} mode? \
            The main directory name is {pseudo_potentials_folder}"
        ),
    ]
)

In [79]:
chain = prompt | llm_with_tools

In [80]:
tool_call = chain.invoke(
    { 
        "compound": compound,
        "pseudo_potentials_folder": pseudo_potentials_folder,
        "mode": mode,
        "functional": functional, 
    }
)

In [81]:
tool_call.additional_kwargs['tool_calls']

[{'id': 'call_pvwX4OPCCeK9YWSP4QYEC2Ih',
  'function': {'arguments': '{"pseudo_potentials":"pseudo/","functional":"PBE","mode":"efficiency"}',
   'name': 'dir_name'},
  'type': 'function'}]

In [82]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "human", "Can you determine the maximum energy and density cutoffs for NaCl for {functional} functional and {mode} mode? \
            The directory with cutoffs is {pseudo_potentials_cutoffs}."
        ),
    ]
)
chain = prompt | llm_with_tools
tool_call = chain.invoke(
    { 
        "compound": compound,
        "pseudo_potentials_cutoffs": pseudo_potentials_cutoffs_folder,
        "mode": mode,
        "functional": functional, 
    }
)
tool_call.additional_kwargs['tool_calls']

[{'id': 'call_oL4VuprgEcFIXK0L6vbsOXiR',
  'function': {'arguments': '{"pseudo_potentials_cutoffs":"pseudo_cutoffs","functional":"PBE","mode":"efficiency","compound":"NaCl"}',
   'name': 'cutoff_limits'},
  'type': 'function'}]