<a href="https://colab.research.google.com/github/hakim-cyber/Comp-Science-for-physics-and-chemistry/blob/main/CSPC_PW7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:

from google.colab import files
uploaded = files.upload()  # choose file

Saving Formula.csv to Formula (4).csv
Saving BalanceEquation1.csv to BalanceEquation1 (4).csv
Saving BalanceEquation2.csv to BalanceEquation2 (4).csv


In [12]:

import csv, re, io

FORMULA_WHOLE_RE = re.compile(r'^(?:[A-Z][a-z]?\d*)+$')
def extract_formula(cell: str) -> str:

    parts = re.split(r'[;,]', cell)
    for seg in reversed([p.strip() for p in parts if p.strip()]):
        if FORMULA_WHOLE_RE.fullmatch(seg):
            return seg
    return cell.strip()


def read_col_csv(file_name: str) -> list[str]:
    data = []
    content = uploaded[file_name].decode('utf-8', errors='ignore')
    for row in csv.reader(io.StringIO(content)):
        s = ",".join(row).strip()
        if s:
            data.append(s)
    return data


TOKEN_RE = re.compile(r"([A-Z][a-z]?)(\d*)")
COEF_RE  = re.compile(r"^\s*(\d+)?\s*([A-Za-z].*?)\s*$")

def parse_formula(formula: str) -> dict:
    counts = {}
    for atom, num in TOKEN_RE.findall(formula):
        count = int(num) if num else 1
        counts[atom] = counts.get(atom, 0) + count
    return counts

def parse_compound(token: str):
    m = COEF_RE.match(token)
    if not m:
        raise ValueError(f"Invalid  token: {token!r}")
    coef_str, formula = m.groups()
    coef = int(coef_str) if coef_str else 1
    return coef, formula

def parse_equation(eq: str):
    if "->" not in eq:
        raise ValueError(f"Equation must  '->': {eq!r}")
    left, right = eq.split("->", 1)
    reactants = [parse_compound(tok.strip()) for tok in left.split("+")]
    products  = [parse_compound(tok.strip()) for tok in right.split("+")]
    return reactants, products

def atoms_on_side(side):
    total = {}
    for coef, formula in side:
        atoms = parse_formula(formula)
        for el, cnt in atoms.items():
            total[el] = total.get(el, 0) + coef * cnt
    return total

def is_balanced(eq: str) -> bool:
    try:
        reactants, products = parse_equation(eq)
        return atoms_on_side(reactants) == atoms_on_side(products)
    except Exception:
        return False



        import io, csv

# Detect uploaded file names
FORMULA_FILE = next((k for k in uploaded if k.lower().startswith("formula")), None)
BAL1_FILE    = next((k for k in uploaded if k.lower().startswith("balanceequation1")), None)
BAL2_FILE    = next((k for k in uploaded if k.lower().startswith("balanceequation2")), None)

def read_onecol_csv(file_name: str):

    raw = uploaded[file_name].decode('utf-8', errors='ignore')
    lines = []
    for line in raw.splitlines():
        s = line.strip()
        if s:
            lines.append(s)
    return lines
formulas_raw = read_onecol_csv(FORMULA_FILE)
eqs1_raw     = read_onecol_csv(BAL1_FILE)
eqs2_raw     = read_onecol_csv(BAL2_FILE)



Formulas: ['Acetic acid;CH3COOH', 'Hydrochloric acid;HCl', 'Sulfuric acid;H2SO4', 'Ammonia;NH3', 'Nitric acid;HNO3']
Eq1: ['CH4+4O2->CO2+4H2O', 'N2+2H2->2NH3', '3Mg+3Cl2->3MgCl2']
Eq2: ['C3H8+4O2->3CO2+3H2O', 'H3PO4+2KOH->K3PO4+H20', 'C2H6+4O2->2CO2+4H2O']


In [15]:

from google.colab import files
import io, csv, re


def has_exact_C2_H5(cell: str) -> bool:
    f = extract_formula(cell)
    c = parse_formula(f)
    return c.get("C", 0) == 2 and c.get("H", 0) == 5

c2h5_matches = [s for s in formulas_raw if has_exact_C2_H5(s)]
print("exact c2h5")
for m in c2h5_matches:
    print(m)
print("Total :", len(c2h5_matches))
print()

# Save to CSV
with open("C2H5_matches.csv", "w", encoding="utf-8", newline="") as f:
    w = csv.writer(f)
    w.writerow(["original_cell"])
    for row in c2h5_matches:
        w.writerow([row])
print("Saved: C2H5_matches.csv")

balanced_lines_1 = []
for i, eq in enumerate(eqs1_raw, start=1):
    if eq.strip() and is_balanced(eq):
        balanced_lines_1.append(i)

print("balanced line numbers 1")
print(balanced_lines_1)
print("Total balanced:", len(balanced_lines_1))

with open("BalancedLines_Bal1.csv", "w", encoding="utf-8", newline="") as f:
    w = csv.writer(f)
    w.writerow(["balanced_line_number_1_based"])
    for ln in balanced_lines_1:
        w.writerow([ln])
print("Saved: BalancedLines_Bal1.csv")

balanced_count_2 = sum(1 for eq in eqs2_raw if eq.strip() and is_balanced(eq))
print(" count of balanced equations 2")
print(balanced_count_2)

with open("BalancedCount_Bal2.txt", "w", encoding="utf-8") as f:
    f.write(str(balanced_count_2))
print("Saved: BalancedCount_Bal2.txt")



exact c2h5
Acetamide;C2H5NO
Total : 1

Saved: C2H5_matches.csv
balanced line numbers 1
[3, 4, 5, 7, 9, 10]
Total balanced: 6
Saved: BalancedLines_Bal1.csv
 count of balanced equations 2
39
Saved: BalancedCount_Bal2.txt
