<a href="https://colab.research.google.com/github/chetools/StemUnleashed/blob/main/EquationBalancingQR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [257]:
import re
from collections import Counter
import numpy as np
import pandas as pd
import scipy as sp
np.set_printoptions(precision=3, linewidth=200)

In [150]:
element_pattern = re.compile(r"([A-Z][a-z]?)(\d*)")

def str2int(s):
    return (1 if ((s is None) or (s=='')) else int(s))

def get_molecule_counts(s):
    counts=Counter()
    matches = re.finditer(element_pattern, s)
    for m in matches:
        counts=counts+Counter({m.group(1):str2int(m.group(2))})
    return counts

def get_span_counts(s):
    span_counts=[]
    molecules=re.finditer((r"[A-Z][^\(\)]+"),s)
    for molecule in molecules:
        span_counts.append((molecule.start(), get_molecule_counts(molecule.group(0))))
    return span_counts

def get_formula(s):
    span_counts=get_span_counts(s)
    multipliers=list(re.finditer((r"([\(\)])(\d+)?"),s))
    multipliers=[(m.start(),m.group(1),str2int(m.group(2))) for m in multipliers]
    exp_stack = sorted(multipliers + span_counts,key=lambda tup:tup[0])
    stack=[]
    counts=Counter()
    for exp in exp_stack:
        if exp[1] == '(':
            stack.append(counts)
            counts=Counter()
        elif exp[1] == ')':
            counts = stack.pop() + Counter({k:v*exp[2] for k,v in counts.items()})
        else:
            counts += exp[1]
    return counts

In [270]:
def get_coeffs(rxn):
    rxn=re.sub(r'\s+','', rxn)
    left,right=rxn.split('->')
    left = [dict(get_formula(s)) for s in left.split("+")]
    right = [{k:-v for k,v in get_formula(s).items()} for s in right.split("+")]
    df=pd.DataFrame(left+right).fillna(0)
    q,r=sp.linalg.qr(df.values)
    rank_nullspace=np.sum(np.abs(r[:,-1])<1e-10)
    return q[:,-rank_nullspace:]

In [271]:
rxn1 = "CuS + Cu2S + HNO3 -> Cu(NO3)2 + CuSO4 + NO2 + H2O"
rxn2 = "KNO3 + C12H22O11 -> N2 + CO2 + H2O + K2CO3"
rxn3 = "Cu + HNO3 -> Cu(NO3)2 + NO + H2O"
q=get_coeffs(rxn3)

In [272]:
q

array([[0.297],
       [0.792],
       [0.297],
       [0.198],
       [0.396]])

In [276]:
r=q @ np.linalg.solve(q[:1],np.array([3.]))
r

array([3., 8., 3., 2., 4.])