In [31]:
import pandas as pd
import numpy as np

def calculate_robust_accuracy(truth_df, test_df, threshold=0.01):
    # Perform an outer join to align rows by index (or a specific key column)
    merged_df = truth_df.merge(test_df, how='outer', left_index=True, right_index=True, suffixes=('_truth', '_test'))

    # Identify rows that are missing in either dataframe
    missing_in_truth = merged_df.filter(regex='_test').isna().any(axis=1)
    missing_in_test = merged_df.filter(regex='_truth').isna().any(axis=1)

    # Calculate element-wise absolute differences for non-missing rows
    diff_df = np.abs(merged_df.filter(regex='_truth').values - merged_df.filter(regex='_test').values)

    # Determine if differences are within the threshold
    within_threshold = (diff_df <= threshold).sum(axis=1)

    # Calculate accuracy
    accuracy = within_threshold.sum() / (merged_df.shape[0]*merged_df.shape[1]/2)  # Mean over all non-missing elements
    return max(accuracy, 0)  # Ensure accuracy is not negative

# Example usage
# truth_df = pd.DataFrame(...)  # Your truth dataframe
# test_df = pd.DataFrame(...)   # Your test dataframe
# accuracy = calculate_robust_accuracy(truth_df, test_df, threshold=0.01)
# print(f"Adjusted Accuracy: {accuracy * 100:.2f}%")


In [3]:
import pandas as pd

# Data as a dictionary
data = {
    'Product': ['Product 4', 'Product 5', 'Product 6', 'Product 16', 'Product 17', 'Product 10', 'Product 11', 'Product 14', 'Product 3', 'Product 19', 
                'Product 20', 'Product 12', 'Product 18', 'Product 8', 'Product 9', 'Product 7', 'Product 1', 'Product 13', 'Product 15'],
    'Sales': [2294, 2130, 2095, 4444, 4171, 3169, 1466, 2482, 1860, 4735, 1130, 2238, 3919, 4092, 2638, 4772, 4174, 1330, 3135],
    'Profit': [443, 591, 513, 847, 956, 376, 260, 121, 869, 574, 158, 559, 660, 485, 291, 905, 761, 413, 352],
    'Units Sold': [24, 71, 71, 82, 48, 64, 73, 16, 89, 13, 98, 12, 27, 71, 60, 56, 51, 60, 30]
}

# Convert the dictionary to a DataFrame
df = pd.DataFrame(data)

# Set 'Product' as the index (optional)
df.set_index('Product', inplace=True)

# Display the DataFrame
print(df)


            Sales  Profit  Units Sold
Product                              
Product 4    2294     443          24
Product 5    2130     591          71
Product 6    2095     513          71
Product 16   4444     847          82
Product 17   4171     956          48
Product 10   3169     376          64
Product 11   1466     260          73
Product 14   2482     121          16
Product 3    1860     869          89
Product 19   4735     574          13
Product 20   1130     158          98
Product 12   2238     559          12
Product 18   3919     660          27
Product 8    4092     485          71
Product 9    2638     291          60
Product 7    4772     905          56
Product 1    4174     761          51
Product 13   1330     413          60
Product 15   3135     352          30


In [8]:
truth = pd.read_csv('truth.csv',index_col=0)
truth

Unnamed: 0,Sales,Profit,Units Sold
Product 1,4174,761,51
Product 2,4507,408,69
Product 3,1860,869,89
Product 4,2294,443,24
Product 5,2130,591,71
Product 6,2095,513,71
Product 7,4772,905,56
Product 8,4092,485,71
Product 9,2638,291,60
Product 10,3169,376,64


In [32]:
calculate_robust_accuracy(truth, df, threshold=0.01)

0.95

In [34]:
merged_df = truth.merge(df, how='outer', left_index=True, right_index=True, suffixes=('_truth', '_test'))
merged_df

Unnamed: 0,Sales_truth,Profit_truth,Units Sold_truth,Sales_test,Profit_test,Units Sold_test
Product 1,4174,761,51,4174.0,761.0,51.0
Product 10,3169,376,64,3169.0,376.0,64.0
Product 11,1466,260,73,1466.0,260.0,73.0
Product 12,2238,559,12,2238.0,559.0,12.0
Product 13,1330,413,60,1330.0,413.0,60.0
Product 14,2482,121,16,2482.0,121.0,16.0
Product 15,3135,352,30,3135.0,352.0,30.0
Product 16,4444,847,82,4444.0,847.0,82.0
Product 17,4171,956,48,4171.0,956.0,48.0
Product 18,3919,660,27,3919.0,660.0,27.0
