In [120]:
import pandas as pd
import regex as re

In [121]:
periodicTableData = pd.read_csv('./data/periodicTableDataFinal.csv')
periodicTableData

Unnamed: 0,Element,Hydrogen,Helium,Lithium,Beryllium,Boron,Carbon,Nitrogen,Oxygen,Fluorine,...,Meitnerium,Darmstadtium,Roentgenium,Copernicium,Nihonium,Flerovium,Moscovium,Livermorium,Tennessine,Oganesson
0,atomicNumber,1,2,3,4,5,6,7,8,9,...,109,110,111,112,113,114,115,116,117,118
1,symbol,H,He,Li,Be,B,C,N,O,F,...,Mt,Ds,Rg,Cn,Nh,Fl,Mc,Lv,Ts,Og
2,name,Hydrogen,Helium,Lithium,Beryllium,Boron,Carbon,Nitrogen,Oxygen,Fluorine,...,Meitnerium,Darmstadtium,Roentgenium,Copernicium,Nihonium,Flerovium,Moscovium,Livermorium,Tennessine,Oganesson
3,atomicMass,1.00794(4),4.002602(2),6.941(2),9.012182(3),10.811(7),12.0107(8),14.0067(2),15.9994(3),18.9984032(5),...,[276],[281],[280],[285],[284],[289],[288],[293],[294],[294]
4,cpkHexColor,FFFFFF,D9FFFF,CC80FF,C2FF00,FFB5B5,909090,3050F8,FF0D0D,90E050,...,EB0026,,,,,,,,,
5,electronConfiguration,1s1,1s2,[He]2s1,[He]2s2,[He]2s22p1,[He]2s22p2,[He]2s22p3,[He]2s22p4,[He]2s22p5,...,[Rn]5f146d77s2,[Rn]5f146d97s1,[Rn]5f146d107s1,[Rn]5f146d107s2,[Rn]5f146d107s27p1,[Rn]5f146d107s27p2,[Rn]5f146d107s27p3,[Rn]5f146d107s27p4,[Rn]5f146d107s27p5,[Rn]5f146d107s27p6
6,electronegativity,2.2,,0.98,1.57,2.04,2.55,3.04,3.44,3.98,...,,,,,,,,,,
7,atomicRadius,37.0,32.0,134.0,90.0,82.0,77.0,75.0,73.0,71.0,...,,,,,,,,,,
8,ionRadius,,,76(+1),45(+2),27(+3),16(+4),146(-3),140(-2),133(-1),...,,,,,,,,,,
9,vanDerWaalsRadius,120.0,140.0,182.0,,,170.0,155.0,152.0,147.0,...,,,,,,,,,,


In [122]:
sodium = pd.Series(periodicTableData.loc[:, 'Sodium'])
sodium.index = pd.Series(periodicTableData.loc[:, 'Element'])
sodium

Element
atomicNumber                         11
symbol                               Na
name                             Sodium
atomicMass               22.98976928(2)
cpkHexColor                      AB5CF2
electronConfiguration           [Ne]3s1
electronegativity                  0.93
atomicRadius                      154.0
ionRadius                       102(+1)
vanDerWaalsRadius                 227.0
ionizationEnergy                  496.0
electronAffinity                  -53.0
oxidationStates                    -1,1
standardState                     solid
bondingType                    metallic
meltingPoint                      371.0
boilingPoint                     1156.0
density                           0.968
groupBlock                  alkalimetal
yearDiscovered                     1807
Name: Sodium, dtype: object

In [123]:
class AppError(Exception):
    def __init__(self, *errorTypes):
        self.errorTypes = errorTypes
    
    def generateAppError(self):
        return f'{self.errorTypes}. Please review the query and retry'

In [124]:
class Element: 
    #* Constructor
    def __init__(self, name):
        self.name = name
        self.cache = {}
        self.getElementMetadata()
    
    #* Generate element metadata based on the pandas dataframe --> Called in a lot of methods below this one
    def getElementMetadata(self):
        try:
            element = pd.Series(periodicTableData.loc[:, self.name])
        except (KeyError, AttributeError):
            return AppError(KeyError.__doc__, AttributeError.__doc__)
        element.index = pd.Series(periodicTableData.loc[:, 'Element'])
        self.cache['element'] = element
        return element;

    #* Get the element's group
    def getElementGroup(self):
        element = self.cache['element']
        if(type(element) == AppError):
            return element.generateAppError()
        return element.groupBlock

    #* Generate a floating point array of values for an elements oxidation states
    def getElementOxidationStates(self):
        element = self.cache['element']
        if(type(element) == AppError):
            return element.generateAppError()
        oxidationStates = element.oxidationStates.split(',')
        for i in range(0, len(oxidationStates)):
            oxidationStates[i] = float(oxidationStates[i])
        self.cache['oxidationStates'] = oxidationStates
        return oxidationStates

In [125]:
sodium = Element('Sodium')
chlorine = Element('Chlorine')

In [126]:
class Reaction: 
    def __init__(self, *entities):
        self.entities = entities
        self.componentElements = []
        self.reactionType = ''
        self.componentMolecules = []
        self.cache = {}
        self.getComponentElements()
        self.defineBondType()

    def getComponentElements(self):
        for i in range(0, len(self.entities)):
            self.componentElements.append(self.entities[i].cache['element'])
        self.cache['elements'] = self.componentElements
        return self.componentElements

    def defineBondType(self):
        elementGroupTypes = [str(element.groupBlock) for element in self.componentElements]
        if((('alkalimetal' or 'alkaliearthmetal') and 'halogen') in elementGroupTypes):
            self.cache['bondType'] = 'ionic'
        else:
            self.cache['bondtype'] = None
        self.cache['bondType'] = 'ionic'
        return self.cache['bondType']
    
    def createReactionSubclass(self):
        return SynthesisReaction(cache = self.cache)

    def clearCache(self):
        store = []
        cache = {}
        for key, val in self.cache.items():
            if key not in store:
                store.append(key)
                cache[key] = val
        return cache
    

In [127]:
class SynthesisReaction(Reaction):
    def __init__(self, cache, reactionConditions = 'STP'):
        
        super().__init__()
        self.cache = cache
        self.clearCache()
        self.reactionConditions = reactionConditions
    def createSynthesisReaction(self):
        elementOxidationNumbers = [Element(self.cache['elements'][i].name).getElementOxidationStates() for i in range(0,len(self.cache['elements']))]
        return elementOxidationNumbers
        

    

In [128]:
result = Reaction(sodium, chlorine)
newResult = result.createReactionSubclass()
newResult.createSynthesisReaction()

[[-1.0, 1.0], [-1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]]

In [129]:
newResult.reactionConditions

'STP'

In [130]:
polyatomicIonData = pd.read_csv('./data/finalIonData.csv')
polyatomicIonData

Unnamed: 0,Ion,NH4,C2H3O,BrO,CO3,HCO,ClO,ClO.1,ClO.2,ClO4,...,HPO4,H2PO4,SiO4,SO3,SO4,HSO4,HS,S2O3,C4H4O6,B4O7
0,Number of Atoms,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,...,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic,Polyatomic
1,Type,Cation,Anion,Anion,Anion,Anion,Anion,Anion,Anion,Anion,...,Anion,Anion,Anion,Anion,Anion,Anion,Anion,Anion,Anion,Anion
2,Charge,1,-1,-1,-2,-1,-1,-1,-1,-1,...,-2,-1,-2,-2,-2,-1,-1,-2,-2,-2
3,Name,Ammonium,Acetate,Bromate,Carbonate,Hydrogen Carbonate (Bicarbonate),Hypochlorite,Chlorite,Chlorate,Perchlorate,...,Hydrogen Phosphate,Dihydrogen Phosphate,Silicate,Sulfite,Sulfate,Hydrogen Sulfate (Bisulfate),Hydrogen Sulfide,Thiosulfate,Tartate,Tetraborate
4,Ion,NH4,C2H3O,BrO,CO3,HCO,ClO,ClO,ClO,ClO4,...,HPO4,H2PO4,SiO4,SO3,SO4,HSO4,HS,S2O3,C4H4O6,B4O7


In [131]:
class PolyatomicIon:
    def __init__(self, ion):
        self.ion = ion
        self.cache = {}
        self.getIonMetadata()

    def getIonMetadata(self):
        try:
            ion = pd.Series(polyatomicIonData.loc[:, self.ion])
        except:
            ionName = self.getIonByName()
            if ionName != None:
                ion = pd.Series(polyatomicIonData.loc[:, ionName])
            else:
                return

        ion.index = pd.Series(polyatomicIonData.loc[:, 'Ion'])
        self.cache['ion'] = ion
        return ion

    def getIonCharge(self):
        charge = float(self.cache['ion'].loc['Charge'])
        return charge

    def getIonByName(self):
        res = polyatomicIonData.iloc[3, :].to_dict()
        final = dict((v,k) for k,v in res.items())
        try:
            ion = final[self.ion]
        except: 
            ion = None
        return ion

    

In [132]:
acetate = PolyatomicIon('Acetate')
acetate.cache['ion'].Type

'Anion'

Let's say Mg(NO3)2 is in the cache
first let's split it into [Mg, (NO3)2]
then lets make a dictionary like this:
{
    Mg: 1,
    NO3: 2,
}

In [133]:
class Expression:
    def __init__(self, expression):
        self.expression = expression
        self.cache = {}

    def splitExpression(self):
        firstPhase = self.expression.split('+')
        for i in range(0, len(firstPhase)):
            self.cache[firstPhase[i]] = firstPhase[i]
        return firstPhase
    
    def parseSubExpressions(self):
        self.splitExpression()
        self.cache['parsedSubExpression'] = []
        pattern = re.compile(r'\(.*?\)\d+')
        for i in range(0, len(self.cache.keys())):
            print(i)
            exp.cache['parsedSubExpression'].append(re.findall(pattern, list(self.cache.keys())[i]))
    


In [134]:
exp = Expression('Mg(NO3)2+NaOH')
exp.parseSubExpressions()
exp.cache

0
1
2


{'Mg(NO3)2': 'Mg(NO3)2',
 'NaOH': 'NaOH',
 'parsedSubExpression': [['(NO3)2'], [], []]}

In [135]:
pattern = re.compile(r'\(.*?\)\d+')

In [139]:
string = 'Mg(NO3)2'
token = re.findall(pattern, string)
strDict = {
    'token1': token[0],
    'token2': string.strip(token[0])
}
strDict

{'token1': '(NO3)2', 'token2': 'Mg'}

In [137]:
token[0]
string.replace(token[0], '')

'Mg'

In [138]:
token[0]

'(NO3)22'

In [149]:
string = 'Mg(NO3)2'
token = re.findall(pattern, string)
strDict = {
    'token1': token[0],
    'token2': string.strip(token[0])
}
strDict

{'token1': '(NO3)2', 'token2': 'Mg'}

In [155]:
multiTokenNumber = float(token[0][-1])
multiTokenNumber

2.0