# Evaluation of supplier risk

#### library and data imports

In [None]:
import pandas as pd

# File imports
processed_files = [
    'addresses_cleaned',
    'articles_cleaned',
    'indices_cleaned',
    'orders_cleaned',
    'suppliers_cleaned'
]
# Load processed CSV files into DataFrames
addresses, articles, indices, orders, suppliers = [
    pd.read_csv(f'../data/processed/{name}.csv', header=0) for name in processed_files
]
# Load country mapping helper file
country_mapping = pd.read_csv('../data/helper/country_mapping.csv', header=0)


#### Calculation of the quantitative risk score for suppliers including the following factors:
1. human_rights_index (LkSG violation risk concerning human rights)
2. environmental_risk (LkSG violation risk concernign environmental regulations)
3. certificates_valid (missing certificates increase risk of liabilty)
4. average order volume (risk of production losses due to overrepresentation of a single supplier)
5. sum of order_value (risk of production losses due to overrepresentation of a single supplier)



In [None]:
def determine_risk_parameters(row):
    # 1. Human rights index (lower value = higher risk, invert)
    hr_index = row['human_rights_index']
    hr_score = 1 if pd.isna(hr_index) else 1 - (hr_index / 100)

    # 2. Environmental risk (lower value = higher risk, invert)
    env_risk = row['environmental_risk']
    env_score = 1 if pd.isna(env_risk) else  1 - (env_risk / 100)

    # 3. Certificates validity (invalid = higher risk)
    cert = row.get('certificates_valid', 'no')
    cert_score = 0 if str(cert).strip().lower() == 'yes' else 1

    # 4. Average order volume (higher = higher risk, normalize to 0-1)
    avg_order_value = row.get('average order value', 0)
    avg_order_score = avg_order_value / (avg_order_value + 60000) if avg_order_value > 0 else 0

    # 5. Total order volume (higher = higher risk, normalize to 0-1)
    order_sum = row.get('sum of order_value', 0)
    order_sum_score = order_sum / (order_sum + 600000) if order_sum > 0 else 0

    return pd.Series({
        'risk_human_rights': round(hr_score, 2),
        'risk_environment': round(env_score, 2),
        'risk_certificates': cert_score,
        'risk_avg_order': round(avg_order_score, 2),
        'risk_order_sum': round(order_sum_score, 2)
    })

#### Weighting of risk factors
The factors are weighted as follows:
- Human rights and environmental risk: 30% each
- Certificate available: 20%
- Average order volume per order and order volume: 10% each

The higher the score, the higher the risk.

In [None]:
def calculate_quantitative_risk_score(row):
    # Weighting of factors to calculate risk score

    score = (0.3 * row.get('risk_human_rights', 0) +
             0.3 * row.get('risk_environment', 0) +
             0.2 * row.get('risk_certificates', 0) +
             0.1 * row.get('risk_avg_order', 0) +
             0.1 * row.get('risk_order_sum', 0))

    # Round to 2 decimal places
    return round(score, 2)

#### Merging processed data to form risk overview table
This includes a helper csv that connects countries to the respective country_id

In [None]:
# Filter suppliers to only keep those with status = 'active'
suppliers = suppliers[suppliers['status'] == 'active']

# Merge the DataFrames Suppliers and Addresses
result_table = suppliers.merge(addresses, on='supplier_id', how='left')

# Calculate the sum of all order_value per supplier_id and add as a new column
order_sums = orders.groupby('supplier_id')['order_value'].sum().round(decimals=2).reset_index().rename(columns={'order_value': 'sum of order_value'})
result_table = result_table.merge(order_sums, on='supplier_id', how='left')

# Calculate the number of orders per supplier_id and add as a new column
order_counts = orders.groupby('supplier_id')['order_id'].count().reset_index().rename(columns={'order_id': 'number of orders'})
result_table = result_table.merge(order_counts, on='supplier_id', how='left')

# Calculate the average order value per supplier_id and add as a new column
result_table['average order value'] = result_table['sum of order_value'] / result_table['number of orders']

# Add industry column: get the unique industry for each supplier from orders and articles
orders_articles = orders.merge(articles, on='article_id', how='left')
supplier_industry = orders_articles.groupby('supplier_id')['industry'].first().reset_index()
result_table = result_table.merge(supplier_industry, on='supplier_id', how='left')

# Add the indices human_rights_index and environmental_risk
result_table = result_table.merge(country_mapping, on='country', how='left')
result_table = result_table.merge(indices, on='country_id', how='left')

# Calculate risk parameters
result_table = result_table.join(result_table.apply(determine_risk_parameters, axis=1))

# Add quantitative risk score column
result_table['quantitative risk score'] = result_table.apply(lambda row: calculate_quantitative_risk_score(row), axis=1)



#### Save detailed and compact versions of result table

In [None]:
result_table.to_csv('../data/result/detailed_result_table.csv', index=False)

# Reduce table and reorder columns
result_table = result_table[['supplier_id', 'country', 'sum of order_value', 'industry', 'quantitative risk score']]
result_table.to_csv('../data/result/result_table.csv', index=False)