In [1]:
import pandas as pd
import randonneur as rd
from pathlib import Path
import randonneur_data as rdata
from math import isnan
from numbers import Number

In [2]:
registry = rdata.Registry()

In [10]:
def valid(a):
    return not isinstance(a, Number) or not isnan(a)


def write_for_version(version, base_path, filename, sheet_name, combined):
    df = pd.read_excel(
        base_path / filename,
        sheet_name,
        na_values=[''],
        keep_default_na=False
    )    
    data = [
        {
            'source': {
                'identifier': row['Simapro Process Identifier'],
                'name': row['Simapro Process Name'],
                'platform_id': row['PlatformID'],
            },
            'target': {
                'filename': row[combined].lower() + ".spold",
                'name': row['Activity Name'],
                'location': row['Geography'],
                'reference product': row['Reference Product Name'],
                'unit': row['Unit'],
            }
        } 
        for row in df.to_dict(orient='records')
        if valid(row['Simapro Process Name'])
    ]
    for obj in data:
        assert valid(obj['target']['location'])
    dp = rd.Datapackage(
        name=f"simapro-ecoinvent-{version}-cutoff",
        description=f"Data migration file from SimaPro 9 to ecoinvent-{version}-cutoff generated by PRé and provided via request at https://support.simapro.com/s/contactsupport",
        contributors=[
            {"title": "PRé", "path": "https://pre-sustainability.com/", "role": "author"},
            {"title": "Chris Mutel", "path": "https://chris.mutel.org/", "role": "wrangler"},
        ],
        mapping_source=rd.MappingConstants.SIMAPRO_CSV,
        mapping_target=rd.MappingConstants.ECOSPOLD2,
        version="2.1.0",
        source_id="SimaPro-9",
        target_id=f"ecoinvent-{version}-cutoff"
    )
    dp.add_data("replace", data)
    return registry.add_file(dp.to_json(Path(f"simapro-ecoinvent-{version}-cutoff.json")), replace=True)

In [11]:
base_path = Path('/Users/cmutel/Projects/SimaPro - ecoinvent mapping/')

In [12]:
write_for_version(
    version="3.10", 
    base_path=base_path, 
    filename="ecoinvent 3.10 Cut-off - SimaPro mapping.xlsx", 
    sheet_name="Mapping_Results_2024-04-25", 
    combined='Activity_UUID_Product_UUID',
)

PosixPath('/Users/cmutel/Code/randonneur_data/randonneur_data/data/simapro-ecoinvent-3.10-cutoff.xz')

In [13]:
write_for_version(
    version="3.8", 
    base_path=base_path, 
    filename='ecoinvent cut-off 3.8 - SimaPro mapping file.xlsx', 
    sheet_name="ecoinventCut-off38-SimaPro", 
    combined='Activity UUID & Product UUID'
)

PosixPath('/Users/cmutel/Code/randonneur_data/randonneur_data/data/simapro-ecoinvent-3.8-cutoff.xz')

In [14]:
write_for_version(
    version="3.9.1", 
    base_path=base_path, 
    filename='Ecoinvent_391_SimaPro_Mapping_Results_2023-10-03.xlsx', 
    sheet_name="Mapping_Results_2023-10-03", 
    combined='Activity_UUID_Product_UUID'
)

PosixPath('/Users/cmutel/Code/randonneur_data/randonneur_data/data/simapro-ecoinvent-3.9.1-cutoff.xz')