# Discount Optimization

In [34]:
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.preprocessing import LabelEncoder
from category_encoders import TargetEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from scipy.optimize import minimize

In [35]:
data = pd.read_csv('final_data.csv')
data = data.drop(['Product_Desc','depoid','Product_Description','unique_ID','Depo_Prod_Unique_ID','Product_Nickname','Nama_Depo'],axis=1)
print(data.shape)
data.head()

(278623, 22)


Unnamed: 0,Wholesaler_ID,Product_ID,PO_ID,Transaction_Date,Base_Price,Quantity_Sold,PPN,Discount_Type,Agg_Discount_Amount,modal_per_pcs_inc_PPN,...,decUomConversion1,decUomConversion2,Final_Price_New,Total_Revenue_New,discount_pct,Year_Transaction,Month_Transaction,is_conversion_equal,Convert_to_box,Price_Difference
0,0074-15-000007073,90103211101100,0074-23-000078054,2023-01-02,1083.333333,24,1981,Reguler,5405.405405,,...,1,1,858.108108,20594.594587,0.2079,2023,1,True,1,225.225225
1,0074-19-000012078,90103211101100,0074-23-000078100,2023-01-02,1083.333333,24,1981,Reguler,5405.405405,,...,1,1,858.108108,20594.594587,0.2079,2023,1,True,1,225.225225
2,0074-19-000012232,90103211101100,0074-23-000078099,2023-01-02,1083.333333,72,5945,Reguler,16216.216216,,...,1,1,858.108108,61783.78376,0.2079,2023,1,True,3,225.225225
3,8133-16-000001011,90103211101100,0074-23-000078112,2023-01-02,1083.333333,24,1981,Reguler,5405.405405,,...,1,1,858.108108,20594.594587,0.2079,2023,1,True,1,225.225225
4,0049-18-000006660,90103211101100,0049-23-000035048,2023-01-02,1125.0,120,9662,Reguler,33784.684684,,...,1,1,843.460961,101215.315316,0.250257,2023,1,True,5,281.539039


In [36]:
data = data.dropna()
data.shape

(210211, 22)

## feature engineering

In [37]:
drop_col = ['BOSnetszUomId', 'BOSnetdecUom', 'decUomConversion1', 'decUomConversion2','is_conversion_equal', 'Price_Difference']

data_purchased = data.drop(drop_col,axis=1)
data_purchased['Product_ID'] = data_purchased['Product_ID'].astype('str')
print(data_purchased.shape)
data_purchased.head()

(210211, 16)


Unnamed: 0,Wholesaler_ID,Product_ID,PO_ID,Transaction_Date,Base_Price,Quantity_Sold,PPN,Discount_Type,Agg_Discount_Amount,modal_per_pcs_inc_PPN,Final_Price_New,Total_Revenue_New,discount_pct,Year_Transaction,Month_Transaction,Convert_to_box
5608,0074-19-000012078,90103211107100,0074-23-000078100,2023-01-02,1083.333333,24,1981,Reguler,5405.405405,727.187362,858.108108,20594.594587,0.2079,2023,1,1
5609,0049-18-000006660,90103211107100,0049-23-000035048,2023-01-02,1125.0,120,9662,Reguler,33784.684684,742.104375,843.460961,101215.315316,0.250257,2023,1,5
5610,0049-18-000008369,90103211107100,0049-23-000035126,2023-01-03,1125.0,48,3963,Reguler,12612.612612,742.104375,862.237237,41387.387388,0.233567,2023,1,2
5611,0049-13-000003123,90103211107100,0049-23-000035116,2023-01-03,1125.0,120,9662,Reguler,33784.684684,742.104375,843.460961,101215.315316,0.250257,2023,1,5
5612,0074-17-000009593,90103211107100,0074-23-000078212,2023-01-03,1083.333333,48,3964,Reguler,10809.909909,727.187362,858.126877,41190.090075,0.207883,2023,1,2


### Create wholesaler-product column pair

In [38]:
data_purchased['Wholesaler_Product_ID'] = data_purchased['Wholesaler_ID'] + "-" + data_purchased['Product_ID']
data_purchased.head()

Unnamed: 0,Wholesaler_ID,Product_ID,PO_ID,Transaction_Date,Base_Price,Quantity_Sold,PPN,Discount_Type,Agg_Discount_Amount,modal_per_pcs_inc_PPN,Final_Price_New,Total_Revenue_New,discount_pct,Year_Transaction,Month_Transaction,Convert_to_box,Wholesaler_Product_ID
5608,0074-19-000012078,90103211107100,0074-23-000078100,2023-01-02,1083.333333,24,1981,Reguler,5405.405405,727.187362,858.108108,20594.594587,0.2079,2023,1,1,0074-19-000012078-90103211107100
5609,0049-18-000006660,90103211107100,0049-23-000035048,2023-01-02,1125.0,120,9662,Reguler,33784.684684,742.104375,843.460961,101215.315316,0.250257,2023,1,5,0049-18-000006660-90103211107100
5610,0049-18-000008369,90103211107100,0049-23-000035126,2023-01-03,1125.0,48,3963,Reguler,12612.612612,742.104375,862.237237,41387.387388,0.233567,2023,1,2,0049-18-000008369-90103211107100
5611,0049-13-000003123,90103211107100,0049-23-000035116,2023-01-03,1125.0,120,9662,Reguler,33784.684684,742.104375,843.460961,101215.315316,0.250257,2023,1,5,0049-13-000003123-90103211107100
5612,0074-17-000009593,90103211107100,0074-23-000078212,2023-01-03,1083.333333,48,3964,Reguler,10809.909909,727.187362,858.126877,41190.090075,0.207883,2023,1,2,0074-17-000009593-90103211107100


### create profit, profit margin % col

In [39]:
# 1. Profit margin %
data_purchased['Profit_Margin_%'] = (
    (data_purchased['Final_Price_New'] - data_purchased['modal_per_pcs_inc_PPN'])
    / data_purchased['Final_Price_New'] )

# 2. Profit per unit
data_purchased['Profit_Per_Unit'] = data_purchased['Final_Price_New'] - data_purchased['modal_per_pcs_inc_PPN']

# 3. Total profit
data_purchased['Total_Profit'] = data_purchased['Profit_Per_Unit'] * data_purchased['Quantity_Sold']

data_purchased.head()

Unnamed: 0,Wholesaler_ID,Product_ID,PO_ID,Transaction_Date,Base_Price,Quantity_Sold,PPN,Discount_Type,Agg_Discount_Amount,modal_per_pcs_inc_PPN,Final_Price_New,Total_Revenue_New,discount_pct,Year_Transaction,Month_Transaction,Convert_to_box,Wholesaler_Product_ID,Profit_Margin_%,Profit_Per_Unit,Total_Profit
5608,0074-19-000012078,90103211107100,0074-23-000078100,2023-01-02,1083.333333,24,1981,Reguler,5405.405405,727.187362,858.108108,20594.594587,0.2079,2023,1,1,0074-19-000012078-90103211107100,0.152569,130.920745,3142.097887
5609,0049-18-000006660,90103211107100,0049-23-000035048,2023-01-02,1125.0,120,9662,Reguler,33784.684684,742.104375,843.460961,101215.315316,0.250257,2023,1,5,0049-18-000006660-90103211107100,0.120167,101.356586,12162.790316
5610,0049-18-000008369,90103211107100,0049-23-000035126,2023-01-03,1125.0,48,3963,Reguler,12612.612612,742.104375,862.237237,41387.387388,0.233567,2023,1,2,0049-18-000008369-90103211107100,0.139327,120.132862,5766.377388
5611,0049-13-000003123,90103211107100,0049-23-000035116,2023-01-03,1125.0,120,9662,Reguler,33784.684684,742.104375,843.460961,101215.315316,0.250257,2023,1,5,0049-13-000003123-90103211107100,0.120167,101.356586,12162.790316
5612,0074-17-000009593,90103211107100,0074-23-000078212,2023-01-03,1083.333333,48,3964,Reguler,10809.909909,727.187362,858.126877,41190.090075,0.207883,2023,1,2,0074-17-000009593-90103211107100,0.152588,130.939514,6285.096675


### Filter data so each wholesaler-product pair have more than 10 POs

In [40]:
data_purchased = data_purchased.groupby('Wholesaler_Product_ID').filter(lambda x : len(x)>=10)
print(data_purchased.shape)
print(len(list(data_purchased['Wholesaler_ID'].unique())))

(128660, 20)
1618


### Add price elasticity

In [41]:
# Calculate percentage changes in Quantity Sold and Final Price
data_purchased['Quantity_Change_%'] = data_purchased.groupby(['Wholesaler_ID', 'Product_ID'])['Quantity_Sold'].pct_change() * 100
data_purchased['Price_Change_%'] = data_purchased.groupby(['Wholesaler_ID', 'Product_ID'])['Final_Price_New'].pct_change() * 100

# Calculate Elasticity
data_purchased['Elasticity'] = data_purchased.apply(
    lambda row: row['Quantity_Change_%'] / row['Price_Change_%'] if row['Price_Change_%'] != 0 else np.nan,
    axis=1
)

# Fill NaN values (this will typically happen for the first row in each group)
data_purchased['Quantity_Change_%'] = data_purchased['Quantity_Change_%'].fillna(0)
data_purchased['Price_Change_%'] = data_purchased['Price_Change_%'].fillna(0)

# Cap elasticity to handle extreme outliers
data_purchased['Elasticity'] = data_purchased['Elasticity'].clip(-10, 10)

# minmax = MinMaxScaler()
# data_purchased['Elasticity'] = minmax.fit_transform(data_purchased[['Elasticity']])

#data_purchased = data_purchased.dropna()
print(data_purchased.shape)
data_purchased.head()

(128660, 23)


Unnamed: 0,Wholesaler_ID,Product_ID,PO_ID,Transaction_Date,Base_Price,Quantity_Sold,PPN,Discount_Type,Agg_Discount_Amount,modal_per_pcs_inc_PPN,...,Year_Transaction,Month_Transaction,Convert_to_box,Wholesaler_Product_ID,Profit_Margin_%,Profit_Per_Unit,Total_Profit,Quantity_Change_%,Price_Change_%,Elasticity
5609,0049-18-000006660,90103211107100,0049-23-000035048,2023-01-02,1125.0,120,9662,Reguler,33784.684684,742.104375,...,2023,1,5,0049-18-000006660-90103211107100,0.120167,101.356586,12162.790316,0.0,0.0,
5627,0049-17-000005662,90103211107100,0049-23-000035291,2023-01-05,1125.0,120,9662,Reguler,33784.684684,742.104375,...,2023,1,5,0049-17-000005662-90103211107100,0.120167,101.356586,12162.790316,0.0,0.0,
5628,0049-18-000006974,90103211107100,0049-23-000035294,2023-01-05,1125.0,240,19324,Reguler,67567.567566,742.104375,...,2023,1,10,0049-18-000006974-90103211107100,0.120175,101.364093,24327.382434,0.0,0.0,
5629,0049-08-000001311,90103211107100,0049-23-000035312,2023-01-05,1125.0,24,1932,Reguler,6755.855855,742.104375,...,2023,1,1,0049-08-000001311-90103211107100,0.120214,101.401631,2433.639145,0.0,0.0,
5631,0049-08-000000754,90103211107100,0049-23-000035315,2023-01-05,1125.0,120,9662,Reguler,33784.684684,742.104375,...,2023,1,5,0049-08-000000754-90103211107100,0.120167,101.356586,12162.790316,0.0,0.0,


## Helper function

### ML modeling to predict quantity

In [42]:
def modeling(X, y):
    # split
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)
    
    # encode
    encoder = TargetEncoder(cols=['Wholesaler_Product_ID'])  # or Wholesaler_Product_ID if you concatenate
    encoder.fit(X_train, y_train)

    X_train = encoder.transform(X_train)
    X_test = encoder.transform(X_test)

    model = RandomForestRegressor(random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    r2_adjusted_train = 1 - (1-model.score(X_train, y_train))*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)
    r2_adjusted_test = 1 - (1-model.score(X_test, y_test))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)

    print("R-squared (R²) Score:", r2)
    print("R² Adjusted Train Score:", r2_adjusted_train)
    print("R² Adjusted Test Score:", r2_adjusted_test)
    print(y_test.head())

    return {'encoder':encoder,
            'model':model,
            'r2':r2,
            'r2_adj_train': r2_adjusted_train,
            'r2_adj_test': r2_adjusted_test}

### simulate discount

In [43]:
def simulate_disc(pred_target, data, drop_col, target, disc, encoder, model):
  new_data = data.drop(drop_col+ target, axis=1)
  new_data['discount_pct'] = disc

  # calculate discount amount & final price manually because of new discount %
  new_data['Agg_Discount_Amount'] = new_data['Base_Price'] * new_data['discount_pct']
  new_data['Final_Price_New'] = new_data['Base_Price'] - new_data['Agg_Discount_Amount']
  new_data['Profit_Margin_%'] = (new_data['Final_Price_New'] - new_data['modal_per_pcs_inc_PPN']) / new_data['Final_Price_New']
  # Apply encodings
  new_data_encoded = encoder.transform(new_data)

  if pred_target == "revenue":
    # Predict
    predicted_revenue = model.predict(new_data_encoded)
    new_data['Predicted_Revenue'] = predicted_revenue
    new_data['Predicted_Quantity'] = new_data['Predicted_Revenue'] / new_data['Final_Price_New']

  if pred_target == "quantity":
      predicted_quantity = model.predict(new_data_encoded)
      new_data['Predicted_Quantity'] = predicted_quantity
      new_data['Predicted_Revenue'] = new_data['Predicted_Quantity'] * new_data['Final_Price_New']
      new_data['Predicted_Profit'] = new_data['Predicted_Quantity'] * (
          new_data['Final_Price_New'] - new_data['modal_per_pcs_inc_PPN']
      )

  return new_data

## Predict quantity sold

In [44]:
drop_col = ['PO_ID', 'Wholesaler_ID','Product_ID', 'Elasticity','Discount_Type','Transaction_Date','Convert_to_box', 'Year_Transaction']
    
target = ['Quantity_Sold']
X = data_purchased.drop(drop_col+target,axis=1)
y = data_purchased[target]

In [45]:
result = modeling(X, y)
result

  return fit_method(estimator, *args, **kwargs)


R-squared (R²) Score: 0.999854967591568
R² Adjusted Train Score: 0.9998186723523029
R² Adjusted Test Score: 0.999854888637813
        Quantity_Sold
262439             24
83473              48
93111              24
38755              24
203178           7248


{'encoder': TargetEncoder(cols=['Wholesaler_Product_ID']),
 'model': RandomForestRegressor(random_state=42),
 'r2': 0.999854967591568,
 'r2_adj_train': 0.9998186723523029,
 'r2_adj_test': 0.999854888637813}

## Simulate hardcoded new discount

In [46]:
data_purchased[['Quantity_Sold','Total_Revenue_New','Final_Price_New','discount_pct','Profit_Margin_%','Total_Profit']][(data_purchased['Wholesaler_ID']=='0049-20-000009207') & (data_purchased['Product_ID']=='90103211107100')].head(1)

Unnamed: 0,Quantity_Sold,Total_Revenue_New,Final_Price_New,discount_pct,Profit_Margin_%,Total_Profit
5664,2400,2687838.0,1119.932432,0.004505,0.337367,906787.337838


customer is relatively reactive to price change, initial discount was 0.4% then we give them 2% discount

In [47]:
data = data_purchased[(data_purchased['Wholesaler_ID']=='0049-20-000009207') & (data_purchased['Product_ID']=='90103211107100')].head(1)
simulate_disc("quantity",data,drop_col,target,0.02, result.get('encoder'), result.get('model'))

Unnamed: 0,Base_Price,PPN,Agg_Discount_Amount,modal_per_pcs_inc_PPN,Final_Price_New,Total_Revenue_New,discount_pct,Month_Transaction,Wholesaler_Product_ID,Profit_Margin_%,Profit_Per_Unit,Total_Profit,Quantity_Change_%,Price_Change_%,Predicted_Quantity,Predicted_Revenue,Predicted_Profit
5664,1125.0,266229,22.5,742.104375,1102.5,2687838.0,0.02,1,0049-20-000009207-90103211107100,0.326889,377.828057,906787.337838,0.0,0.0,2542.56,2803172.4,916327.5003


## Find optimized discount

using minimize() function from scipy to find the discount that maximizes profit

Since we want to maximize profit, we convert Maximize profit to Minimize (–profit)

It tries to find the x (the discount) that minimizes f(x) using iterative methods like SLSQP (Sequential Least Squares Programming)

1. Input to minimize():
A discount value x[0]

2. Objective function:
f(x) to minimize -profit by calculating predicted quantity using regression coefficients

3. Computes final price from discount: Calculates profit = (final price – cost) × quantity then returns –profit (because we want to maximize it)

4. Constraints:

- Discount must be between 0.5% and 45%

- Final price must be ≥ cost

Why minimize and not linear programming?

D = discount

- Final price = Base_Price * (1 - D) → linear in D

- But profit = (Final price - cost) * Predicted quantity → not constant because profit is dependant to quantity

This is a product of two linear terms in D, which results in a nonlinear (specifically, quadratic) function.
That means the objective function is not linear, so linear programming can't be used.



In [56]:
results = []

# Group the data to optimize each Wholesaler_Product_ID separately
grouped = data_purchased.groupby('Wholesaler_Product_ID')

for key, group in grouped:
    # if group['discount_pct'].nunique() < 2:
    #     continue  # Skip if not enough variation in discount

    # Separate features and target
    X = group.drop(columns=drop_col+target)
    y = group['Quantity_Sold']

    # Encode categorical (if needed)
    if 'Wholesaler_Product_ID' in X.columns:
        encoder = TargetEncoder(cols=['Wholesaler_Product_ID'])
        X = encoder.fit_transform(X, y)

    # Fit regression
    # Using linear regression to stimate how Quantity_Sold responds to discount_pct + other variables
    reg = LinearRegression()
    reg.fit(X, y)

    # To evaluate what happens if we change the discount, we need to recalculate the predicted quantity
    # use intercept & coef as a way to predict a new value given changes in features (like a different discount)
    intercept = reg.intercept_
    coef = dict(zip(X.columns, reg.coef_))

    # Use latest price and cost from group
    base_price = group['Base_Price'].iloc[-1]
    cost = group['modal_per_pcs_inc_PPN'].iloc[-1]

    # Fixed values for features (other than discount_pct)
    latest_features = X.iloc[-1].to_dict()

    # define objective function
    # Returns negative profit so that minimize() can maximize it
    def objective(x):
        D = x[0]
        features = latest_features.copy()
        features['discount_pct'] = D
        predicted_quantity = intercept + sum(coef[k] * features[k] for k in coef)
        final_price = base_price * (1 - D)
        profit = (final_price - cost) * predicted_quantity
        return -profit

    # Constraints: 0.5% ≤ D ≤ 45% and final price ≥ cost
    constraints = [
        {'type': 'ineq', 'fun': lambda x: x[0] - 0.005},
        {'type': 'ineq', 'fun': lambda x: 0.45 - x[0]},
        {'type': 'ineq', 'fun': lambda x: base_price * (1 - x[0]) - cost}
    ]
    x0 = np.array(group['discount_pct'])
    opt_result = minimize(objective, x0=x0, constraints=constraints, method='SLSQP')

    if opt_result.success:
        # Extract wholesaler and product IDs from the group
        wholesaler_id = group['Wholesaler_ID'].iloc[0]
        product_id = group['Product_ID'].iloc[0]
        opt_disc = round(opt_result.x[0], 3)
        base_price = group['Base_Price'].iloc[0]
        cost = group['modal_per_pcs_inc_PPN'].iloc[0]
        past_price = group['Final_Price_New'].iloc[0]
        past_discount = group['discount_pct'].iloc[0]
        past_qty = group['Quantity_Sold'].iloc[0]
        past_profit = group['Total_Profit'].iloc[0]

        features = latest_features.copy()
        features['discount_pct'] = opt_disc
        predicted_quantity = intercept + sum(coef[k] * features[k] for k in coef)

        optimized_discount_amt = base_price * opt_disc
        new_final_price = base_price - optimized_discount_amt
        profit_margin_pct = (new_final_price - cost) / new_final_price
        profit_per_unit = new_final_price - cost
        total_profit = profit_per_unit * predicted_quantity

        results.append({
            'Wholesaler_ID': wholesaler_id,
            'Product_ID': product_id,
            'past_price': past_price,
            'past_discount': past_discount,
            'past_qty': past_qty,
            'past_profit':past_profit,
            'optimized_discount': opt_disc,
            'optimized_discount_amt': optimized_discount_amt,
            'opt_predicted_quantity': predicted_quantity,
            'opt_predicted_profit':total_profit,
            'new_final_price': new_final_price
        })

# Convert results to DataFrame
optimized_df = pd.DataFrame(results)

In [57]:
# discount structure
optimized_df

Unnamed: 0,Wholesaler_ID,Product_ID,past_price,past_discount,past_qty,past_profit,optimized_discount,optimized_discount_amt,opt_predicted_quantity,opt_predicted_profit,new_final_price
0,0049-08-000000003,90103211107100,843.468468,0.250250,96,9730.952974,0.067,75.3750,72.0,22141.485000,1049.625000
1,0049-08-000000003,90103211122100,1351.351351,0.099099,48,2214.866466,0.028,42.0000,72.0,11001.002400,1458.000000
2,0049-08-000000003,90103311133100,808.070570,0.072072,144,9499.132115,0.018,15.6750,48.0,5426.589984,855.158333
3,0049-08-000000003,90103311140100,808.063063,0.072081,120,7915.042528,0.072,62.7000,120.0,7923.474963,808.133333
4,0049-08-000000006,90103311133100,792.379880,0.090090,1920,96528.968730,0.090,78.3750,1440.0,72509.699563,792.458333
...,...,...,...,...,...,...,...,...,...,...,...
2604,8133-17-000001369,90103311141100,784.534534,0.099099,480,27526.642417,0.099,86.2125,240.0,13784.032928,784.620833
2605,8133-18-000001411,90103232126100,814.564564,0.022523,120,10485.264208,0.006,5.0000,48.0,4855.006584,828.333333
2606,8133-18-000001411,90103232128100,814.564564,0.022523,120,10485.264208,0.006,5.0000,48.0,4855.006584,828.333333
2607,8133-18-000001439,90103211116100,1364.864865,0.090090,840,50111.514488,0.090,135.0000,1560.0,93275.052000,1365.000000


## Compare the result of optimized discount to random forest result

applying the optimized discount to random forest model to predict quantity & profit

In [50]:
comparison = optimized_df.copy()

there's a significant difference between opt_predicted_quantity and rf_predicted_quantity,

because opt_predicted_quantity uses linear regression, while rf_predicted_quantity use random forest, which learns the pattern better

In [51]:
for i in range(comparison.shape[0]):
    row = comparison.iloc[i]
    wholesaler_id = row['Wholesaler_ID']
    product_id = row['Product_ID']
    optim_disc = row['optimized_discount']

    subset = data_purchased[
        (data_purchased['Wholesaler_ID'] == wholesaler_id) &
        (data_purchased['Product_ID'] == product_id)
    ]

    res = simulate_disc("quantity", subset, drop_col, target, optim_disc, result.get('encoder'), result.get('model'))

    # Use first predicted value (assumes one row or that the first is representative)
    # comparison.at[i, 'discount_act'] = subset['discount_pct'].iloc[0]
    # comparison.at[i, 'quantity_sold_act'] = subset['Quantity_Sold'].iloc[0]
    # comparison.at[i, 'profit_act'] = subset['Total_Profit'].iloc[0]
    comparison.at[i, 'rf_predicted_quantity'] = res['Predicted_Quantity'].iloc[0]
    comparison.at[i, 'rf_predicted_profit'] = res['Predicted_Profit'].iloc[0]

comparison

Unnamed: 0,Wholesaler_ID,Product_ID,past_price,past_discount,past_qty,past_profit,optimized_discount,optimized_discount_amt,opt_predicted_quantity,opt_predicted_profit,new_final_price,rf_predicted_quantity,rf_predicted_profit
0,0049-08-000000003,90103211107100,843.468468,0.250250,96,9730.952974,0.067,75.3750,72.0,22141.485000,1049.625000,91.68,28193.490900
1,0049-08-000000003,90103211122100,1351.351351,0.099099,48,2214.866466,0.028,42.0000,72.0,11001.002400,1458.000000,48.00,7334.001600
2,0049-08-000000003,90103311133100,808.070570,0.072072,144,9499.132115,0.018,15.6750,48.0,5426.589984,855.158333,144.00,16279.769953
3,0049-08-000000003,90103311140100,808.063063,0.072081,120,7915.042528,0.072,62.7000,120.0,7923.474963,808.133333,120.00,7923.474963
4,0049-08-000000006,90103311133100,792.379880,0.090090,1920,96528.968730,0.090,78.3750,1440.0,72509.699563,792.458333,1919.76,96667.514468
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2604,8133-17-000001369,90103311141100,784.534534,0.099099,480,27526.642417,0.099,86.2125,240.0,13784.032928,784.620833,471.36,27071.840670
2605,8133-18-000001411,90103232126100,814.564564,0.022523,120,10485.264208,0.006,5.0000,48.0,4855.006584,828.333333,120.00,12137.516460
2606,8133-18-000001411,90103232128100,814.564564,0.022523,120,10485.264208,0.006,5.0000,48.0,4855.006584,828.333333,120.00,12137.516460
2607,8133-18-000001439,90103211116100,1364.864865,0.090090,840,50111.514488,0.090,135.0000,1560.0,93275.052000,1365.000000,841.68,50325.478056


In [52]:
comparison['qty_difference'] = comparison['past_qty'] - comparison['rf_predicted_quantity']
comparison['profit_difference'] = comparison['past_profit'] - comparison['rf_predicted_profit']

In [53]:
# predicted higher than actual = optimized discount might work
comparison[comparison['profit_difference']<0]

Unnamed: 0,Wholesaler_ID,Product_ID,past_price,past_discount,past_qty,past_profit,optimized_discount,optimized_discount_amt,opt_predicted_quantity,opt_predicted_profit,new_final_price,rf_predicted_quantity,rf_predicted_profit,qty_difference,profit_difference
0,0049-08-000000003,90103211107100,843.468468,0.250250,96,9730.952974,0.067,75.3750,72.0,22141.485000,1049.625000,91.68,28193.490900,4.32,-18462.537926
1,0049-08-000000003,90103211122100,1351.351351,0.099099,48,2214.866466,0.028,42.0000,72.0,11001.002400,1458.000000,48.00,7334.001600,0.00,-5119.135134
2,0049-08-000000003,90103311133100,808.070570,0.072072,144,9499.132115,0.018,15.6750,48.0,5426.589984,855.158333,144.00,16279.769953,0.00,-6780.637838
3,0049-08-000000003,90103311140100,808.063063,0.072081,120,7915.042528,0.072,62.7000,120.0,7923.474963,808.133333,120.00,7923.474963,0.00,-8.432435
4,0049-08-000000006,90103311133100,792.379880,0.090090,1920,96528.968730,0.090,78.3750,1440.0,72509.699563,792.458333,1919.76,96667.514468,0.24,-138.545738
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2603,8133-17-000001369,90103311139100,784.534534,0.099099,360,20644.981813,0.099,86.2125,240.0,13784.032928,784.620833,360.00,20676.049392,0.00,-31.067579
2605,8133-18-000001411,90103232126100,814.564564,0.022523,120,10485.264208,0.006,5.0000,48.0,4855.006584,828.333333,120.00,12137.516460,0.00,-1652.252252
2606,8133-18-000001411,90103232128100,814.564564,0.022523,120,10485.264208,0.006,5.0000,48.0,4855.006584,828.333333,120.00,12137.516460,0.00,-1652.252252
2607,8133-18-000001439,90103211116100,1364.864865,0.090090,840,50111.514488,0.090,135.0000,1560.0,93275.052000,1365.000000,841.68,50325.478056,-1.68,-213.963568


In [54]:
# actual higher than predicted, optimized discount might need to be further discussed
comparison[comparison['profit_difference']>0]

Unnamed: 0,Wholesaler_ID,Product_ID,past_price,past_discount,past_qty,past_profit,optimized_discount,optimized_discount_amt,opt_predicted_quantity,opt_predicted_profit,new_final_price,rf_predicted_quantity,rf_predicted_profit,qty_difference,profit_difference
8,0049-08-000000007,90103211146100,799.924925,0.085800,1200,69384.659910,0.086,75.2500,600.0,34587.374998,799.750000,1180.56,68054.119050,19.44,1330.540860
10,0049-08-000000007,90103232126100,815.878378,0.067568,1200,88528.804055,0.068,59.5000,600.0,44037.375000,815.500000,1176.96,86383.714800,23.04,2145.089255
11,0049-08-000000007,90103311133100,784.534534,0.099099,6000,254580.955208,0.099,86.2125,2400.0,102039.499279,784.620833,5886.24,250262.075932,113.76,4318.879276
12,0049-08-000000007,90103311140100,784.534534,0.099099,1200,50916.191042,0.099,86.2125,1200.0,51019.749639,784.620833,1182.24,50264.657345,17.76,651.533697
13,0049-08-000000016,90103211146100,818.693694,0.064350,480,36762.872973,0.064,56.0000,1176.0,90429.255015,819.000000,475.92,36596.165850,4.08,166.707123
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2599,8133-17-000001369,90103211146100,795.795795,0.045045,960,65864.095644,0.045,37.5000,720.0,49425.098771,795.833333,939.12,64466.803830,20.88,1397.291814
2600,8133-17-000001369,90103232126100,795.795795,0.045045,960,65864.095644,0.045,37.5000,240.0,16475.032924,795.833333,939.12,64466.803830,20.88,1397.291814
2601,8133-17-000001369,90103232128100,795.795795,0.045045,720,49398.071733,0.045,37.5000,480.0,32950.065847,795.833333,711.12,48815.522553,8.88,582.549180
2602,8133-17-000001369,90103311133100,784.534534,0.099099,6000,344083.030208,0.099,86.2125,2400.0,137840.329279,784.620833,5875.68,337460.694141,124.32,6622.336067
