In [1]:
import numpy as np
import pandas as pd

input_path = '../data/'
combined_factors_path = input_path+'combined/processConversionFactors_w_uncertainties.csv'
icis_types_path = input_path + 'processed/icisFacilityProduction_w_uncertainties.csv'

defined_matches_path = input_path+'extra_inputs/all_icis_to_ihs_manual_input.csv'
output_path = '../data/'

In [4]:
# Create ICIS to IHS links

combined_factors = pd.read_csv(combined_factors_path)
reduced = combined_factors[combined_factors.columns[['sigma' not in i for i in combined_factors.columns]]]

# Association creation for ihs to icis
icis_ihs_matches = pd.DataFrame(columns=['ihs_match', 'PRODUCT', 'ROUTE', 'TECHNOLOGY', 'LICENSOR'])
ihs_types = reduced[['Product','ihs_match']].dropna(subset=['ihs_match'])
name_adjusts = {'PROPYLENE, POLYMER GRADE':'PROPYLENE', 'TEREPHTHALIC ACID':'PTA', 'VINYL ACETATE':'VINYL ACETATE M.'}
ihs_types['Product'] = ihs_types['Product'].str.upper().replace(name_adjusts)
icis_types = pd.read_csv(icis_types_path, index_col=0)[['PRODUCT','ROUTE','TECHNOLOGY','LICENSOR']].drop_duplicates()

def single_value(df, ans1):
    if len(df) == 0:
        match = ans1
    elif len(df) == 1:
        match = df.copy()
    else:
        return False
    return match

for row in icis_types.iloc:
    options = ihs_types[[row['PRODUCT']==product for product in ihs_types['Product']]]
    match = single_value(options, pd.DataFrame(np.array([np.NaN,np.NaN]).reshape(1,2), columns=['ihs_match','Product']))
    if match is False:
        double_match = options[[row['ROUTE'] in option for option in options['ihs_match']]]
        match = single_value(double_match, options)
        if match is False:
            triple_match = double_match[[row['TECHNOLOGY'] in option for option in double_match['ihs_match']]]
            match = single_value(triple_match, double_match)
            if match is False:
                quad_match = triple_match[[row['LICENSOR'] in option for option in triple_match['ihs_match']]]
                match = single_value(quad_match, triple_match)
            if match is False: match = quad_match

    for col in ['PRODUCT', 'ROUTE', 'TECHNOLOGY', 'LICENSOR']:
        match[col] = row[col]
    icis_ihs_matches = pd.concat((icis_ihs_matches, match), axis=0)
    icis_ihs_matches.drop(columns=['Product'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match[col] = row[col]


In [6]:
## Add manually input matches for unfound
defined_matches = pd.read_csv(defined_matches_path, index_col=0)
unfound = icis_ihs_matches[icis_ihs_matches['ihs_match'].isnull()][['PRODUCT', 'ROUTE']].drop_duplicates()
unfound_matches = unfound.merge(defined_matches, on=['PRODUCT','ROUTE'], how='left')

In [13]:
## Add manually input matches for unfound
defined_matches = pd.read_csv(defined_matches_path, index_col=0)
unfound = icis_ihs_matches[icis_ihs_matches['ihs_match'].isnull()][['PRODUCT', 'ROUTE']].drop_duplicates()
unfound_matches = unfound.merge(defined_matches, on=['PRODUCT','ROUTE'], how='left')

# Output new unfound matches for continued manual input
unfound_matches.to_csv(defined_matches_path, index=False)

# Incorporate manual matches to full matches
matches_update = icis_ihs_matches[icis_ihs_matches['ihs_match'].isnull()].drop(columns=['ihs_match']).merge(unfound_matches, on=['PRODUCT','ROUTE'], how='left')
icis_ihs_matches_combined = pd.concat((icis_ihs_matches.dropna(subset=['ihs_match']), matches_update), axis=0)

# Write all ICIS to IHS matches
icis_ihs_matches_combined.sort_values(['PRODUCT','ROUTE','TECHNOLOGY','LICENSOR','ihs_match']).drop_duplicates().to_csv(output_path+'extra_inputs/all_icis_to_ihs_matches.csv')

# production_update[['PRODUCT','ROUTE','ihs_match']].sort_values(['PRODUCT','ROUTE']).drop_duplicates().to_csv(output_path+'all_icis_to_ihs_manual_input.csv')