# Match - Natural resources

If the name is exactly the same, then we can be pretty confidant there is a 1-1 correspondence.

In [1]:
import pandas as pd
from pathlib import Path
from datetime import datetime, timezone
from notebook_utils import finish_notebook

Get paths of input and output directories

In [2]:
input_data_dir = (Path.cwd().parent / "Mapping" / "Input" / "Flowlists").resolve()
existing_matches_dir = (Path.cwd().parent / "Mapping" / "Output" / "Mapped_files").resolve()

Read input dataframes

In [3]:
sp = pd.read_csv(input_data_dir / 'SimaProv9.4.csv')

In [4]:
ei = pd.read_csv(input_data_dir / 'ecoinventEFv3.7.csv')

## Merge based on ecoinvent `Context`

The SimaPro input file has the context `Resources` - we need to use the ecoinvent conext to match to subcontexts. 

We will use this mapping, and iterate one by one over the subcontexts:

| ecoinvent context | Simapro context | Match condition (Simapro to ecoinvent) |
| ----------------- | --------------- | -------------------------------------- |
| natural resource/land | Resources/land | = |
| natural resource/unspecified | Resources/(unspecified) | = |
| natural resource/biotic | Resources/biotic | = |
| natural resource/in ground | Resources/in ground | = |
| natural resource/in air | Resources/in air | = |
| natural resource/in water | Resources/in water | = |
| natural resource/in water | Resources/fossil well | ~ |

In [5]:
contexts = [
    ("natural resource/land", "Resources/land"),
    ("natural resource/unspecified", "Resources/(unspecified)"),
    ("natural resource/biotic", "Resources/biotic"),
    ("natural resource/in ground", "Resources/in ground"),
    ("natural resource/in air", "Resources/in air"),
    ("natural resource/in water", "Resources/in water"),
    ("natural resource/in water", "Resources/fossil well"),
]

In [6]:
result = []

for ei_context, sp_context in contexts:
    sp_filtered = sp[sp.Context == 'Raw materials']
    ei_filtered = ei[ei.Context == ei_context]  
    df_subcontext = sp_filtered.merge(ei_filtered, how="inner", left_on=["Flowable"], right_on=["Flowable"])
    df_subcontext.Context_x = sp_context
    result.append(df_subcontext)

In [7]:
df = pd.concat(result)
df['SourceFlowName'] = df['TargetFlowName'] = df['Flowable']

In [8]:
finish_notebook(
    df=df,
    author="Chris Mutel",
    notebook_name="Match - Resources",
    filename="identical-names-in-resources",
)