<a href="https://colab.research.google.com/github/profteachkids/StemUnleashed/blob/main/EqBalancing_Xpath.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from lxml import html
import re
import pandas as pd
import numpy as np

In [2]:
content=requests.get("https://en.wikipedia.org/wiki/Chemical_element").content
tree = html.fromstring(content)

In [74]:
symbol_names=tree.xpath(r'//table//td//a/span[@title]/@title')
mws = tree.xpath(r'//table//td//a//span[@data-sort-value]/../text()')

In [75]:
mws_dict={symbol_name.split(',')[0].strip():float(mw) for symbol_name,mw in zip(symbol_names, mws)}

In [76]:
atom_pat=re.compile(r'([A-Z][a-z]*)([0-9]*)')
stoic_pat = re.compile(r'^([a-z]+?|[0-9]+?)(?:[A-Z][a-z]?[0-9]*)')

In [96]:
molecule='C63H88CoN14O14P'
atoms=atom_pat.findall(molecule)

In [99]:
tot_MW=0.
for atom,num in atoms:
    num=1. if num=='' else float(num)
    tot_MW+=mws_dict[atom]*num
tot_MW

1355.388

In [21]:
# rxn = 'aKNO3 + 5C12H22O11 -> bN2 + cCO2 + dH2O + eK2CO3'
rxn = 'CH4 + C2H6 + C3H8 + aO2 -> bCO2 + cH2O + 2CO'

In [86]:
left,right = rxn.replace(' ','').split('->')

In [87]:
def create_rxn_list(side,sign=-1):
    molecules = side.split('+')

    rxn_list=[]
    for molecule in molecules:
        stoic = stoic_pat.findall(molecule)
        name=molecule[len(stoic):]
        if len(stoic)==0:
            n=sign
        elif stoic[0].isnumeric():
            n=int(stoic[0])*sign
        else:
            n=stoic[0]
        rxn_list.append(dict(molecule=name,atom='stoic',n=n))
        atoms = atom_pat.findall(molecule)
        for atom, num in atoms:
            val= 1 if num=='' else int(num)
            rxn_list.append(dict(molecule=name,atom=atom,n=val))
    return rxn_list

In [88]:
rxn_list=create_rxn_list(left,sign=-1)
rxn_list.extend(create_rxn_list(right,sign=1))
df=pd.DataFrame(rxn_list)

In [89]:
df2=df.pivot(index='atom',columns='molecule')
df2.columns=df2.columns.droplevel(0)
df2=df2.fillna(0)
df2

molecule,C2H6,C3H8,CH4,CO,CO2,H2O,O2
atom,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C,2,3,1,1,1,0,0
H,6,8,4,0,0,2,0
O,0,0,0,1,2,1,2
stoic,-1,-1,-1,2,b,c,a


In [90]:
fixed_col = pd.to_numeric(df2.loc['stoic'],errors='coerce').notna()
var_col=pd.to_numeric(df2.loc['stoic'],errors='coerce').isna()
nonstoic=df2.index!='stoic'
fixed_sum=(df2.loc[nonstoic,fixed_col]*pd.to_numeric(df2.loc['stoic',fixed_col])).sum(axis=1)
res=np.linalg.solve(df2.loc[nonstoic,var_col].to_numpy().astype(np.float64),-fixed_sum.to_numpy())

In [91]:
df3=df2.copy()
df3.loc['stoic',var_col]=res
    

In [92]:
df3

molecule,C2H6,C3H8,CH4,CO,CO2,H2O,O2
atom,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C,2,3,1,1,1.0,0.0,0.0
H,6,8,4,0,0.0,2.0,0.0
O,0,0,0,1,2.0,1.0,2.0
stoic,-1,-1,-1,2,4.0,9.0,-9.5


In [93]:
left=[]
right=[]
for molecule, stoic_num in zip(df3.columns,df3.loc['stoic']):
    stoic_num = int(stoic_num) if float(stoic_num).is_integer() else stoic_num
    if np.abs(stoic_num)==1:
        stoic_str=''
    else:
        stoic_str=f'{np.abs(stoic_num)}'
    if stoic_num<0:
        left.append(stoic_str+molecule)
    else:
        right.append(stoic_str+molecule)

print(' + '.join(left), ' -> ', ' + '.join(right))

C2H6 + C3H8 + CH4 + 9.5O2  ->  2CO + 4CO2 + 9H2O
