# Match - Minor random name differences

In [1]:
import pandas as pd
from pathlib import Path
from notebook_utils import *

Get paths of input and output directories

In [2]:
input_data_dir = (Path.cwd().parent / "Mapping" / "Input" / "Flowlists").resolve()
existing_matches_dir = (Path.cwd().parent / "Mapping" / "Output" / "Mapped_files").resolve()

Read input dataframes

In [3]:
sp = pd.read_csv(input_data_dir / 'SimaProv9.4.csv')

In [4]:
ei = pd.read_csv(input_data_dir / 'ecoinventEFv3.7.csv')

[Stolen from https://github.com/jhuo2021/Importing_Agri-footprint_to_BW](https://github.com/jhuo2021/Importing_Agri-footprint_to_BW/blob/main/Data/migrations/agrifootprint-6-economic.json)

In [5]:
good_data = [
    ('2,4-D, dimethylamine salt', 'Dimethylamine'),
    ('Acids, unspecified', 'Acidity, unspecified'),
    ('Argon-40/kg', 'Argon-40'),
    ('Arsenic', 'Arsenic V'),
    ('Benzo(b,j,k)fluoranthene', 'Benzo(b)fluoranthene'),
    ('BOD5 (Biological Oxygen Demand)', 'BOD5, Biological Oxygen Demand'),
    ('BOD5 (Biological Oxygen Demand), NO', 'BOD5, Biological Oxygen Demand'),
    ('Chlorotoluron', 'Chlortoluron'),
    ('Chromium III', 'Chromium'),
    ('Coal, 26.4 MJ per kg', 'Coal, hard, unspecified, in ground'),
    ('Coal, 29.3 MJ per kg', 'Coal, hard, unspecified, in ground'),
    ('COD (Chemical Oxygen Demand)', 'COD, Chemical Oxygen Demand'),
    ('COD (Chemical Oxygen Demand), NO', 'COD, Chemical Oxygen Demand'),
    ('Dioxin, 2,3,7,8 Tetrachlorodibenzo-p-', 'Dioxins, measured as 2,3,7,8-tetrachlorodibenzo-p-dioxin'),
    ('Discarded fish, demersal', 'Discarded fish, demersal, to ocean'),
    ('Discarded fish, pelagic', 'Discarded fish, pelagic, to ocean'),
    ('Flurochloridone', 'Fluorochloridone'),
    ('Kaolin ore', 'Kaolinite, 24% in crude ore, in ground'),
    ('Lead-210/kg', 'Lead-210'),
    ('Metaldehyde (tetramer)', 'Metaldehyde'),
    ('Metolachlor, (S)', 'Metolachlor'),
    ('Nitrate compounds', 'Nitrate'),
    ('p-Xylene', 'Xylene'),
    ('Phenols, unspecified', 'Phenol'),
    ('Radioactive species, unspecified', 'Radioactive species, other beta emitters'),
    ('Radium-226/kg', 'Radium-226'),
    ('Radium-228/kg', 'Radium-228'),
    ('Sand, gravel and stone, extracted for use', 'Gravel, in ground'),
    ('Tri-allate', 'Triallate'),
]

# I am not sure that these are perfect...
unsure_data = [
    ('1-Butene', 'Butene'),
    ('2-Butene', 'Butene'),
    ('BTEX (Benzene, Toluene, Ethylbenzene, and Xylene), unspecified ratio', 'Benzene'),
    ('Calcium carbonate', 'Calcium, in ground'),
    ('Calcium chloride', 'Calcium, in ground'),
    ('Ethane, 1,1,2-trifluoro-, HFC-143', 'Ethane, 1,1,1-trifluoro-, HFC-143a'),
    ('Magnesium chloride', 'Magnesium, in ground'),
    ('Phosphate, NO', 'Phosphate'),
    ('Sulfur monoxide', 'Sulfur oxides'),
]

data = good_data + unsure_data

In [6]:
ecoinvent_names = pd.DataFrame(
    data,
    columns=["Flowable", "EcoinventFlowable"]
)

In [7]:
sp = sp.merge(ecoinvent_names, how="inner", on="Flowable")

In [8]:
for c in ("air", "water", "soil", "resources"):
    sp_filtered = expand_simapro_context(sp, c)
    sp_filtered = add_ecoinvent_context_column(sp_filtered, label="EcoinventContext", kind=c)
    df = sp_filtered.merge(
        ei, 
        how="inner", 
        left_on=["EcoinventFlowable", "EcoinventContext"], 
        right_on=["Flowable", "Context"]
    )
    finish_notebook(
        df=df, 
        author="Chris Mutel",
        notebook_name="Match - Minor random name differences",
        filename=f'minor-name-differences-in-{c}'
    )