#### This script finds the minimum metal-electronic atom distances of the guest molecule for each valid structure. It requires the CONTCAR files and a .dat file containing the energies of the valid structures.

In [1]:
from ase import Atoms, Atom
from ase.visualize import view
from ase.io import read, write
from ase.build import mx2, add_adsorbate
from ase.constraints import FixAtoms
from ase.build import surface
from ase.data.colors import jmol_colors
from IPython import display
from sklearn.metrics.pairwise import pairwise_distances_argmin, pairwise_distances_argmin_min
from sklearn.ensemble import IsolationForest
from sklearn.manifold import MDS
import numpy as np

import os
from ase.collections import g2
from ase.build import molecule

###scikitlearn
from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.model_selection import train_test_split


##plot
import pandas as pd
import matplotlib.pyplot as plt 


def read_data(file):
    with open(file, 'r') as file:
        no_structures = []
        for line in file:
            for i in line.split():
                no_structures.append(int(i))
        return no_structures
    


    


### Determining the minimum distance between Metal and the most electronegative atom from the molecule guest

In [None]:
main = os.getcwd()

#directories = ['alcohol_bencilico', 'CO', 'metanol']
directories = ['H2S']
for i in directories:
    dir = main +'/'+ i
    print(dir)
    os.chdir(dir)
    os.mkdir('name_directory') ##directory where the files will be saved
    good_str = read_data('myfile.dat')  ### file with the list of valid structures
    #print(good_str)
    os.chdir('output')
    atoms = read('conf_2/CONTCAR') ##output files
    
    indice_mol = [atom.index for atom in atoms if atom.symbol == 'S' or atom.symbol == 'O' ] ### specify electronegative atoms from the molecule
    indice_metals = [atom.index for atom in atoms if atom.symbol == 'In']  #metal of MOF
    print(indice_mol, indice_metals)
    df_descriptors = pd.DataFrame(columns=['No. Structure', 'Energy', 'd_O-Sc0'])
    for n in good_str:
        energies = read('conf_{}/OUTCAR'.format(n), format='vasp-out', index=-1)
        energy = energies.get_potential_energy()
    
    
          
        complejo = read('conf_{}/CONTCAR'.format(n))
        ##Activate if guest molecule has oxygen as the most electronegative atom
        #d24_Sc0 = complejo.get_distance(indice_mol[-1], indice_metals[0])
        #d24_Sc1 = complejo.get_distance(indice_mol[-1], indice_metals[1])
        #d24_Sc2 = complejo.get_distance(indice_mol[-1], indice_metals[2])
        #d24_Sc3 = complejo.get_distance(indice_mol[-1], indice_metals[3])
        
        
        
        ##Activate if guest molecule has sulfur as the most electronegative atom, check the index of Sulfur in your system 
        d24_Sc0 = complejo.get_distance(indice_mol[0], indice_metals[0])
        d24_Sc1 = complejo.get_distance(indice_mol[0], indice_metals[1])
        d24_Sc2 = complejo.get_distance(indice_mol[0], indice_metals[2])
        d24_Sc3 = complejo.get_distance(indice_mol[0], indice_metals[3])
    
    
        df_descriptors = df_descriptors.append({'No. Structure': n, 'Energy': energy,
                                            'd_O-Sc0': d24_Sc0, 'd_O-Sc1': d24_Sc1, 
                                            'd_O-Sc2': d24_Sc2, 'd_O-Sc3': d24_Sc3 
                                            },
                                           ignore_index=True)
   

    df_descriptors.to_csv('descriptors.csv') ###file .csv with information of minimum distances for every valid structure
    
    ##determining the 4 structures with the lowest minimum distance 
    df_mindist = []
    for i in range(4):
        minimos = df_descriptors.iloc[df_descriptors['d_O-Sc{}'.format(i)].idxmin()]
        os.system('cp -rf conf_{} ../distancia_mascorta'.format(int(minimos['No. Structure'])) )
        df_mindist.append(minimos)
    
    # Dataframe with the four lowest distances 
    df_resultados = pd.DataFrame(df_mindist)
    df_resultados.to_csv('estructuras_min_distancia.csv', index=False)
    print(df_resultados)
    #print('______________________________________________')
    
    
    os.chdir('../../')
    



