In [None]:
from os import listdir
import numpy as np
import pandas as pd

In [None]:
#-----SETTINGS-----

directory = "./Directory/Of/CIFs/"
charge_column = "_atom_site_charge"
output_csv = "./example.csv"

In [None]:
def flux(Q,p):
    """
    Take pressure (p) and charge (Q),
    Return estimated charge-flux effect

    Function was estimated by fitting simulations of a non-physicla point charge in RASPA. 
    If function estimates the effects to be below 0 (due to the way the data was fit), it will instead be return 0 instead!
    """
    
    c = np.array([-4.01479419e-01,  9.58680949e-01 , 2.02455722e-01, -3.25777707e-02,-1.44514943e-02 , 5.01944807e-04 , 3.12802362e-04,  1.12292432e-03,-2.00354705e-07  ,1.16669086e-11, -6.28647382e-01])
    var_mat = np.array([Q,Q**2,Q**3,Q**4,Q**5,Q**6,Q**7,p,p**2,p**3,np.ones(len(Q))])
    calc= np.matmul(c,var_mat)
    fluxes = np.array([max(0,i) for i in calc])
    return sum(fluxes)

def fluxFromChargeList(clist, pressure):
    Q = np.array(clist)
    p = np.ones(len(Q)) * pressure
    return flux(Q,p)

def replaceTabs(text, spaces = "   "):
    output = ""
    for i in text.split('\t'):
        output += i
        output += spaces
    output=output[:-len(spaces)]
    return output

def getChargeListFromCif(cif_address):
    CHARGE_COLUMN = f"{charge_column}\n"
    atom_charges = []
    
    f = open(cif_address, 'r')
    data = f.read()
    f.close()
    
    data = replaceTabs(data)
    
    if len(data.split(f'{CHARGE_COLUMN}'))  == 1:
        return []
    
    cols_from_end = 1
    block = data.split(f'{CHARGE_COLUMN}')[1]
    lines = block.split('\n')

    for line in lines:
        if line.strip() != "":
            if line.strip()[0] == '_':
                cols_from_end +=1 
                print(line)
            elif line.strip() != "" and len(line.split()) >= 4: 
                col_index = cols_from_end * -1
                charge = line.strip().split()[col_index]
                atom_charges.append(float(charge))

    return atom_charges

In [None]:
charges = {}

In [None]:
cifs = list(listdir(directory))

errored = []

for cif in cifs:
    try:
        charges[cif] = getChargeListFromCif(directory+cif)
    except:
        errored.append(cif)
print(len(errored))


In [None]:
flux_descriptors = {'FILE':[], 'Flux40': [], 'Flux1000':[], 'Flux4000':[]}
for cif in charges.keys():
    phi40   = fluxFromChargeList(charges[cif],40)
    phi1000 = fluxFromChargeList(charges[cif],1000)
    phi4000 = fluxFromChargeList(charges[cif],4000)
    flux_descriptors['FILE'].append(cif)
    flux_descriptors['Flux40'].append(phi40)
    flux_descriptors['Flux1000'].append(phi1000)
    flux_descriptors['Flux4000'].append(phi4000)


In [None]:
df = pd.DataFrame(flux_descriptors)

In [None]:
df.to_csv(output_csv)