In [1]:
# Packages

import pencilbox as pb
import pandas as pd
import datetime as dt
import time
import numpy as np
from calendar import monthrange
from datetime import timedelta, datetime, date
import gc
import warnings
import math
warnings.filterwarnings("ignore")

# Connection

CON_TRINO = pb.get_connection("[Warehouse] Trino")
CON_REDSHIFT = pb.get_connection("[Warehouse] Redshift")

def read_sql_query(sql, con):
    max_tries = 1
    for attempt in range(max_tries):
        print(f"Read attempt: {attempt}...")
        try:
            start = time.time()
            df = pd.read_sql_query(sql, con)
            end = time.time()
            if (end - start) > 60:
                print("Time: ", round((end - start) / 60, 2), " min")
            else:
                print("Time: ", round(end - start, 2), " sec")
            return df
            break
        except BaseException as e:
            print(e)
            time.sleep(10)
            
# Viewing configs

pd.set_option("display.max_columns", None)
pd.set_option("display.float_format", lambda x: "%.3f" % x)

# pd.set_option("display.max_rows", None)

In [2]:
# start_date = pd.to_datetime(datetime.now() + timedelta(hours = 5.5) - timedelta(days = 26)).date()
# start_date

# Sales Query

In [3]:
sales_query = f"""
with item_product_mapping as
(
    SELECT
        DISTINCT
            ipr.product_id,
            case
                when
                    ipr.item_id is null
                then ipom_0.item_id else ipr.item_id end as item_id,
            case
                when
                    ipr.item_id is not null
                then
                    COALESCE(ipom.multiplier,1)
                else
                    COALESCE(ipom_0.multiplier,1)
                end AS multiplier
    FROM
        lake_rpc.item_product_mapping ipr
    left join
        dwh.dim_item_product_offer_mapping ipom
    on
        ipr.product_id = ipom.product_id
    and
            ipr.item_id = ipom.item_id
    left join
        dwh.dim_item_product_offer_mapping ipom_0
    on
        ipr.product_id = ipom_0.product_id
),

required_items as 
(
    select
        *
    from
        lake_rpc.product_product
    where
    --     active = 1
    -- and
        lower(name) ilike '%%coca-cola%%'
    and
        lower(name) ilike '%%can%%'

),

sales_base as
(
    select
        date(i.cart_checkout_ts_ist) as date,
        cl.name as city,
        i.outlet_id,
        ipm.item_id,
        i.cart_id,
        sum(1.000*i.product_quantity * COALESCE(multiplier,1)) as product_quantity,
        sum(1.000*i.total_selling_price * COALESCE(multiplier,1)) as product_gmv
    from
        dwh.fact_sales_order_item_details i
    
    inner join
        dwh.fact_sales_order_details j
    on
        i.cart_id=j.cart_id
    
    left join
        item_product_mapping ipm
    ON
        i.product_id = ipm.product_id
    
    left join
        lake_rpc.item_category_details icd
    ON
        icd.item_id = ipm.item_id
    
    left join
        lake_retail.console_outlet co
    on
        co.id = i.outlet_id
    and
        co.business_type_id = 7
    
    left join
        lake_retail.console_location cl
    on
        cl.id = tax_location_id
    
    where
        i.cart_checkout_ts_ist >= '2022-01-26'
        AND i.cart_checkout_ts_ist <= '2022-03-28'
    and
        j.is_internal_order = false
    and
        j.order_current_status = 'DELIVERED'
    and
        icd.item_id in (select distinct item_id from required_items)
    group by 1,2,3,4,5
)

SELECT * FROM sales_base
"""
sales_df = read_sql_query(sales_query, CON_REDSHIFT)
sales_df.head()

Read attempt: 0...
Time:  12.07  sec


Unnamed: 0,date,city,outlet_id,item_id,cart_id,product_quantity,product_gmv
0,2022-01-26,Delhi,1489.0,10004485,146442702,3.0,99.0
1,2022-01-28,Delhi,1701.0,10004491,146810204,2.0,70.0
2,2022-01-30,Delhi,1024.0,10004491,147303965,2.0,70.0
3,2022-01-31,Delhi,2939.0,10004491,147913527,2.0,70.0
4,2022-02-01,Mumbai,3008.0,10004485,148024244,2.0,76.0


In [4]:
cart_base_df = sales_df.groupby(['date','city','cart_id']).agg({'item_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'item_id':'unique_items'}).reset_index()
cart_base_df = cart_base_df.merge(sales_df[['date','city','item_id','cart_id']], on=['date','city','cart_id'], how = 'left')
cart_base_df.head()

Unnamed: 0,date,city,cart_id,unique_items,product_quantity,product_gmv,item_id
0,2022-01-26,Ahmedabad,145823893,1,1.0,38.0,10000608
1,2022-01-26,Ahmedabad,145859801,1,2.0,76.0,10000608
2,2022-01-26,Ahmedabad,146193643,1,1.0,38.0,10004485
3,2022-01-26,Ahmedabad,146546205,1,1.0,38.0,10004485
4,2022-01-26,Ahmedabad,146617239,1,1.0,38.0,10000608


In [5]:
pure_carts_df = cart_base_df[cart_base_df['unique_items'] == 1]
pure_carts_df = pure_carts_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'pure_carts','product_quantity':'pure_product_quantity','product_gmv':'pure_product_gmv'}).reset_index()
pure_carts_df.head()

Unnamed: 0,date,city,item_id,pure_carts,pure_product_quantity,pure_product_gmv
0,2022-01-26,Ahmedabad,10000608,5,9.0,342.0
1,2022-01-26,Ahmedabad,10004485,4,4.0,152.0
2,2022-01-26,Bengaluru,10000608,30,74.0,2812.0
3,2022-01-26,Bengaluru,10004485,32,56.0,2128.0
4,2022-01-26,Bengaluru,10004491,43,125.0,4750.0


In [6]:
mix_carts_df = cart_base_df[cart_base_df['unique_items'] > 1]
mix_carts_df = mix_carts_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'mix_carts','product_quantity':'mix_product_quantity','product_gmv':'mix_product_gmv'}).reset_index()
mix_carts_df.head()

Unnamed: 0,date,city,item_id,mix_carts,mix_product_quantity,mix_product_gmv
0,2022-01-26,Ahmedabad,10000608,1,2.0,76.0
1,2022-01-26,Ahmedabad,10004485,1,2.0,76.0
2,2022-01-26,Bengaluru,10000608,4,60.0,2280.0
3,2022-01-26,Bengaluru,10004485,1,3.0,114.0
4,2022-01-26,Bengaluru,10004491,3,57.0,2166.0


In [7]:
total_carts_df = cart_base_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'total_carts','product_quantity':'product_quantity','product_gmv':'product_gmv'}).reset_index()
total_carts_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv
0,2022-01-26,Ahmedabad,10000608,6,11.0,418.0
1,2022-01-26,Ahmedabad,10004485,5,6.0,228.0
2,2022-01-26,Bengaluru,10000608,34,134.0,5092.0
3,2022-01-26,Bengaluru,10004485,33,59.0,2242.0
4,2022-01-26,Bengaluru,10004491,46,182.0,6916.0


In [8]:
final_df = total_carts_df.merge(mix_carts_df, on = ['date','city','item_id'], how = 'left')
final_df = final_df.merge(pure_carts_df, on = ['date','city','item_id'], how = 'left')
final_df = final_df.fillna(0)
final_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv
0,2022-01-26,Ahmedabad,10000608,6,11.0,418.0,1.0,2.0,76.0,5.0,9.0,342.0
1,2022-01-26,Ahmedabad,10004485,5,6.0,228.0,1.0,2.0,76.0,4.0,4.0,152.0
2,2022-01-26,Bengaluru,10000608,34,134.0,5092.0,4.0,60.0,2280.0,30.0,74.0,2812.0
3,2022-01-26,Bengaluru,10004485,33,59.0,2242.0,1.0,3.0,114.0,32.0,56.0,2128.0
4,2022-01-26,Bengaluru,10004491,46,182.0,6916.0,3.0,57.0,2166.0,43.0,125.0,4750.0


In [9]:
final_df['pure_ratio'] = final_df['pure_carts']/final_df['total_carts']
final_df['mix_ratio'] = final_df['mix_carts']/final_df['total_carts']
final_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio
0,2022-01-26,Ahmedabad,10000608,6,11.0,418.0,1.0,2.0,76.0,5.0,9.0,342.0,0.833,0.167
1,2022-01-26,Ahmedabad,10004485,5,6.0,228.0,1.0,2.0,76.0,4.0,4.0,152.0,0.8,0.2
2,2022-01-26,Bengaluru,10000608,34,134.0,5092.0,4.0,60.0,2280.0,30.0,74.0,2812.0,0.882,0.118
3,2022-01-26,Bengaluru,10004485,33,59.0,2242.0,1.0,3.0,114.0,32.0,56.0,2128.0,0.97,0.03
4,2022-01-26,Bengaluru,10004491,46,182.0,6916.0,3.0,57.0,2166.0,43.0,125.0,4750.0,0.935,0.065


In [10]:
final_df['dow'] = pd.to_datetime(final_df['date']).dt.dayofweek
final_df.head(2)

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio,dow
0,2022-01-26,Ahmedabad,10000608,6,11.0,418.0,1.0,2.0,76.0,5.0,9.0,342.0,0.833,0.167,2
1,2022-01-26,Ahmedabad,10004485,5,6.0,228.0,1.0,2.0,76.0,4.0,4.0,152.0,0.8,0.2,2


In [11]:
event_date = pd.to_datetime('2022-03-26')
comp_date_lw = event_date - timedelta(days = 7)
required_dow = event_date.dayofweek

print(event_date, comp_date_lw, required_dow)

#final_df['comp_date_lw'] = final_df['date'] - timedelta(days = 7)

#final_df['required_dow'] = pd.to_datetime(final_df['date']).dt.dayofweek

#final_df.head(2)

2022-03-26 00:00:00 2022-03-19 00:00:00 5


In [12]:
event_date_df = final_df[pd.to_datetime(final_df['date']) == pd.to_datetime(event_date)]
event_date_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio,dow
2803,2022-03-26,Ahmedabad,10000608,9,43.0,4584.0,3.0,18.0,1677.0,6.0,25.0,2907.0,0.667,0.333,5
2804,2022-03-26,Ahmedabad,10004485,7,20.0,783.0,2.0,8.0,315.0,5.0,12.0,468.0,0.714,0.286,5
2805,2022-03-26,Ahmedabad,10004491,14,56.0,3192.0,5.0,26.0,1992.0,9.0,30.0,1200.0,0.643,0.357,5
2806,2022-03-26,Bengaluru,10000608,94,267.0,28776.0,12.0,43.0,3618.0,82.0,224.0,25158.0,0.872,0.128,5
2807,2022-03-26,Bengaluru,10004485,58,112.0,5341.0,11.0,31.0,2182.0,47.0,81.0,3159.0,0.81,0.19,5


In [13]:
comp_date_lw_df = final_df[pd.to_datetime(final_df['date']) == pd.to_datetime(comp_date_lw)]
comp_date_lw_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio,dow
2443,2022-03-19,Ahmedabad,10000608,8,30.0,2143.0,3.0,15.0,592.0,5.0,15.0,1551.0,0.625,0.375,5
2444,2022-03-19,Ahmedabad,10004485,8,11.0,429.0,0.0,0.0,0.0,8.0,11.0,429.0,1.0,0.0,5
2445,2022-03-19,Ahmedabad,10004491,8,30.0,1192.0,3.0,15.0,592.0,5.0,15.0,600.0,0.625,0.375,5
2446,2022-03-19,Bengaluru,10000608,59,138.0,7331.0,12.0,42.0,1655.0,47.0,96.0,5676.0,0.797,0.203,5
2447,2022-03-19,Bengaluru,10004485,112,191.0,7451.0,5.0,12.0,470.0,107.0,179.0,6981.0,0.955,0.045,5


In [14]:
dow_df = final_df[(pd.to_datetime(final_df['date']) >= pd.to_datetime(event_date) - timedelta(days = 30)) & (pd.to_datetime(final_df['date']) <= pd.to_datetime(event_date) - timedelta(days = 1))& (final_df['dow'] == required_dow)]
#dow_df = final_df[(pd.to_datetime(final_df['date']) == pd.to_datetime(event_date) - timedelta(days = 30))]
dow_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio,dow
1380,2022-02-26,Ahmedabad,10000608,4,6.0,232.0,2.0,4.0,155.0,2.0,2.0,77.0,0.5,0.5,5
1381,2022-02-26,Ahmedabad,10004485,8,20.0,776.0,1.0,3.0,117.0,7.0,17.0,659.0,0.875,0.125,5
1382,2022-02-26,Ahmedabad,10004491,7,17.0,655.0,3.0,7.0,272.0,4.0,10.0,383.0,0.571,0.429,5
1383,2022-02-26,Bengaluru,10000608,77,265.0,10113.0,11.0,104.0,3960.0,66.0,161.0,6153.0,0.857,0.143,5
1384,2022-02-26,Bengaluru,10004485,64,118.0,4499.0,6.0,22.0,837.0,58.0,96.0,3662.0,0.906,0.094,5


In [15]:
comp_date_lw_df = comp_date_lw_df[['city','item_id', 'total_carts', 'product_quantity', 'product_gmv','mix_carts','mix_product_quantity','mix_product_gmv','pure_carts','pure_product_quantity','pure_product_gmv']]
comp_date_lw_df.columns = ['city','item_id', 'lw_total_carts', 'lw_product_quantity', 'lw_product_gmv','lw_mix_carts','lw_mix_product_quantity','lw_mix_product_gmv','lw_pure_carts','lw_pure_product_quantity','lw_pure_product_gmv']
comp_date_lw_df.head()

Unnamed: 0,city,item_id,lw_total_carts,lw_product_quantity,lw_product_gmv,lw_mix_carts,lw_mix_product_quantity,lw_mix_product_gmv,lw_pure_carts,lw_pure_product_quantity,lw_pure_product_gmv
2443,Ahmedabad,10000608,8,30.0,2143.0,3.0,15.0,592.0,5.0,15.0,1551.0
2444,Ahmedabad,10004485,8,11.0,429.0,0.0,0.0,0.0,8.0,11.0,429.0
2445,Ahmedabad,10004491,8,30.0,1192.0,3.0,15.0,592.0,5.0,15.0,600.0
2446,Bengaluru,10000608,59,138.0,7331.0,12.0,42.0,1655.0,47.0,96.0,5676.0
2447,Bengaluru,10004485,112,191.0,7451.0,5.0,12.0,470.0,107.0,179.0,6981.0


In [16]:
dow_df = dow_df.groupby(['city','item_id']).agg({'total_carts':'mean', 'product_quantity':'mean', 'product_gmv':'mean','mix_carts':'mean','mix_product_quantity':'mean','mix_product_gmv':'mean','pure_carts':'mean','pure_product_quantity':'mean','pure_product_gmv':'mean'}).reset_index()
#dow_df = dow_df[['city', 'total_carts', 'product_quantity', 'product_gmv','mix_carts','mix_product_quantity','mix_product_gmv','pure_carts','pure_product_quantity','pure_product_gmv']]
dow_df.columns = ['city','item_id', 'dow_total_carts', 'dow_product_quantity', 'dow_product_gmv','dow_mix_carts','dow_mix_product_quantity','dow_mix_product_gmv','dow_pure_carts','dow_pure_product_quantity','dow_pure_product_gmv']
dow_df.head()

Unnamed: 0,city,item_id,dow_total_carts,dow_product_quantity,dow_product_gmv,dow_mix_carts,dow_mix_product_quantity,dow_mix_product_gmv,dow_pure_carts,dow_pure_product_quantity,dow_pure_product_gmv
0,Ahmedabad,10000608,7.75,28.75,1365.0,2.0,7.75,304.75,5.75,21.0,1060.25
1,Ahmedabad,10004485,5.75,11.5,447.75,0.75,2.5,97.75,5.0,9.0,350.0
2,Ahmedabad,10004491,6.25,18.5,730.25,2.25,8.25,324.5,4.0,10.25,405.75
3,Bengaluru,10000608,87.75,244.75,9935.5,10.0,55.75,2150.5,77.75,189.0,7785.0
4,Bengaluru,10004485,69.5,125.75,4880.5,5.25,17.25,669.5,64.25,108.5,4211.0


In [17]:
event_date_df = pd.merge(event_date_df, comp_date_lw_df, on = ['city','item_id'], how = 'left')
event_date_df = pd.merge(event_date_df, dow_df, on = ['city','item_id'], how = 'left')

In [18]:
event_date_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio,dow,lw_total_carts,lw_product_quantity,lw_product_gmv,lw_mix_carts,lw_mix_product_quantity,lw_mix_product_gmv,lw_pure_carts,lw_pure_product_quantity,lw_pure_product_gmv,dow_total_carts,dow_product_quantity,dow_product_gmv,dow_mix_carts,dow_mix_product_quantity,dow_mix_product_gmv,dow_pure_carts,dow_pure_product_quantity,dow_pure_product_gmv
0,2022-03-26,Ahmedabad,10000608,9,43.0,4584.0,3.0,18.0,1677.0,6.0,25.0,2907.0,0.667,0.333,5,8.0,30.0,2143.0,3.0,15.0,592.0,5.0,15.0,1551.0,7.75,28.75,1365.0,2.0,7.75,304.75,5.75,21.0,1060.25
1,2022-03-26,Ahmedabad,10004485,7,20.0,783.0,2.0,8.0,315.0,5.0,12.0,468.0,0.714,0.286,5,8.0,11.0,429.0,0.0,0.0,0.0,8.0,11.0,429.0,5.75,11.5,447.75,0.75,2.5,97.75,5.0,9.0,350.0
2,2022-03-26,Ahmedabad,10004491,14,56.0,3192.0,5.0,26.0,1992.0,9.0,30.0,1200.0,0.643,0.357,5,8.0,30.0,1192.0,3.0,15.0,592.0,5.0,15.0,600.0,6.25,18.5,730.25,2.25,8.25,324.5,4.0,10.25,405.75
3,2022-03-26,Bengaluru,10000608,94,267.0,28776.0,12.0,43.0,3618.0,82.0,224.0,25158.0,0.872,0.128,5,59.0,138.0,7331.0,12.0,42.0,1655.0,47.0,96.0,5676.0,87.75,244.75,9935.5,10.0,55.75,2150.5,77.75,189.0,7785.0
4,2022-03-26,Bengaluru,10004485,58,112.0,5341.0,11.0,31.0,2182.0,47.0,81.0,3159.0,0.81,0.19,5,112.0,191.0,7451.0,5.0,12.0,470.0,107.0,179.0,6981.0,69.5,125.75,4880.5,5.25,17.25,669.5,64.25,108.5,4211.0


In [19]:
# event_date_df = event_date_df.fillna(0)
event_date_df['lift_lw_total'] = np.where(event_date_df['lw_total_carts'] == 0, 0.2, (event_date_df['total_carts'] / event_date_df['lw_total_carts']) - 1)
event_date_df['lift_dow_total'] = np.where(event_date_df['dow_total_carts'] == 0, 0.2, (event_date_df['total_carts'] / event_date_df['dow_total_carts']) - 1)

event_date_df['lift_lw_pure'] = np.where(event_date_df['lw_pure_carts'] == 0, 0.2, (event_date_df['pure_carts'] / event_date_df['lw_pure_carts']) - 1)
event_date_df['lift_dow_pure'] = np.where(event_date_df['dow_pure_carts'] == 0, 0.2, (event_date_df['pure_carts'] / event_date_df['dow_pure_carts']) - 1)

event_date_df['lift_lw_mix'] = np.where(event_date_df['lw_mix_carts'] == 0, 0.2, (event_date_df['mix_carts'] / event_date_df['lw_mix_carts']) - 1)
event_date_df['lift_dow_mix'] = np.where(event_date_df['dow_mix_carts'] == 0, 0.2, (event_date_df['mix_carts'] / event_date_df['dow_mix_carts']) - 1)

event_date_df = event_date_df.fillna(0)
#event_date_df = event_date_df[['date', 'dow', 'city', 'total_carts', 'lw_carts', 'dow_carts', 'product_quantity', 'lw_quantity', 'dow_quantity', 'lift_lw', 'lift_dow',]]
event_date_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio,dow,lw_total_carts,lw_product_quantity,lw_product_gmv,lw_mix_carts,lw_mix_product_quantity,lw_mix_product_gmv,lw_pure_carts,lw_pure_product_quantity,lw_pure_product_gmv,dow_total_carts,dow_product_quantity,dow_product_gmv,dow_mix_carts,dow_mix_product_quantity,dow_mix_product_gmv,dow_pure_carts,dow_pure_product_quantity,dow_pure_product_gmv,lift_lw_total,lift_dow_total,lift_lw_pure,lift_dow_pure,lift_lw_mix,lift_dow_mix
0,2022-03-26,Ahmedabad,10000608,9,43.0,4584.0,3.0,18.0,1677.0,6.0,25.0,2907.0,0.667,0.333,5,8.0,30.0,2143.0,3.0,15.0,592.0,5.0,15.0,1551.0,7.75,28.75,1365.0,2.0,7.75,304.75,5.75,21.0,1060.25,0.125,0.161,0.2,0.043,0.0,0.5
1,2022-03-26,Ahmedabad,10004485,7,20.0,783.0,2.0,8.0,315.0,5.0,12.0,468.0,0.714,0.286,5,8.0,11.0,429.0,0.0,0.0,0.0,8.0,11.0,429.0,5.75,11.5,447.75,0.75,2.5,97.75,5.0,9.0,350.0,-0.125,0.217,-0.375,0.0,0.2,1.667
2,2022-03-26,Ahmedabad,10004491,14,56.0,3192.0,5.0,26.0,1992.0,9.0,30.0,1200.0,0.643,0.357,5,8.0,30.0,1192.0,3.0,15.0,592.0,5.0,15.0,600.0,6.25,18.5,730.25,2.25,8.25,324.5,4.0,10.25,405.75,0.75,1.24,0.8,1.25,0.667,1.222
3,2022-03-26,Bengaluru,10000608,94,267.0,28776.0,12.0,43.0,3618.0,82.0,224.0,25158.0,0.872,0.128,5,59.0,138.0,7331.0,12.0,42.0,1655.0,47.0,96.0,5676.0,87.75,244.75,9935.5,10.0,55.75,2150.5,77.75,189.0,7785.0,0.593,0.071,0.745,0.055,0.0,0.2
4,2022-03-26,Bengaluru,10004485,58,112.0,5341.0,11.0,31.0,2182.0,47.0,81.0,3159.0,0.81,0.19,5,112.0,191.0,7451.0,5.0,12.0,470.0,107.0,179.0,6981.0,69.5,125.75,4880.5,5.25,17.25,669.5,64.25,108.5,4211.0,-0.482,-0.165,-0.561,-0.268,1.2,1.095


In [20]:
event_date_df['anticipated_lift_total'] = (event_date_df['lift_lw_total'] + event_date_df['lift_dow_total']) / 2
event_date_df['anticipated_lift_pure'] = (event_date_df['lift_lw_pure'] + event_date_df['lift_dow_pure']) / 2
event_date_df['anticipated_lift_mix'] = (event_date_df['lift_lw_mix'] + event_date_df['lift_dow_mix']) / 2

#event_date_df = event_date_df[['date', 'dow', 'city', 'total_carts', 'lw_carts', 'dow_carts','product_quantity', 'lw_quantity', 'dow_quantity', 'lift_lw', 'lift_dow', 'anticipated_lift', 'product_gmv', 'lw_gmv', 'dow_gmv',]]

In [21]:
event_date_df.head(2)

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio,dow,lw_total_carts,lw_product_quantity,lw_product_gmv,lw_mix_carts,lw_mix_product_quantity,lw_mix_product_gmv,lw_pure_carts,lw_pure_product_quantity,lw_pure_product_gmv,dow_total_carts,dow_product_quantity,dow_product_gmv,dow_mix_carts,dow_mix_product_quantity,dow_mix_product_gmv,dow_pure_carts,dow_pure_product_quantity,dow_pure_product_gmv,lift_lw_total,lift_dow_total,lift_lw_pure,lift_dow_pure,lift_lw_mix,lift_dow_mix,anticipated_lift_total,anticipated_lift_pure,anticipated_lift_mix
0,2022-03-26,Ahmedabad,10000608,9,43.0,4584.0,3.0,18.0,1677.0,6.0,25.0,2907.0,0.667,0.333,5,8.0,30.0,2143.0,3.0,15.0,592.0,5.0,15.0,1551.0,7.75,28.75,1365.0,2.0,7.75,304.75,5.75,21.0,1060.25,0.125,0.161,0.2,0.043,0.0,0.5,0.143,0.122,0.25
1,2022-03-26,Ahmedabad,10004485,7,20.0,783.0,2.0,8.0,315.0,5.0,12.0,468.0,0.714,0.286,5,8.0,11.0,429.0,0.0,0.0,0.0,8.0,11.0,429.0,5.75,11.5,447.75,0.75,2.5,97.75,5.0,9.0,350.0,-0.125,0.217,-0.375,0.0,0.2,1.667,0.046,-0.188,0.933


In [22]:
event_date_df.columns

Index(['date', 'city', 'item_id', 'total_carts', 'product_quantity',
       'product_gmv', 'mix_carts', 'mix_product_quantity', 'mix_product_gmv',
       'pure_carts', 'pure_product_quantity', 'pure_product_gmv', 'pure_ratio',
       'mix_ratio', 'dow', 'lw_total_carts', 'lw_product_quantity',
       'lw_product_gmv', 'lw_mix_carts', 'lw_mix_product_quantity',
       'lw_mix_product_gmv', 'lw_pure_carts', 'lw_pure_product_quantity',
       'lw_pure_product_gmv', 'dow_total_carts', 'dow_product_quantity',
       'dow_product_gmv', 'dow_mix_carts', 'dow_mix_product_quantity',
       'dow_mix_product_gmv', 'dow_pure_carts', 'dow_pure_product_quantity',
       'dow_pure_product_gmv', 'lift_lw_total', 'lift_dow_total',
       'lift_lw_pure', 'lift_dow_pure', 'lift_lw_mix', 'lift_dow_mix',
       'anticipated_lift_total', 'anticipated_lift_pure',
       'anticipated_lift_mix'],
      dtype='object')

In [23]:
overall_df = event_date_df.groupby(['date', 'dow','item_id']).agg({
    'total_carts':'sum', 'lw_total_carts':'sum', 'dow_total_carts':'sum', 'product_quantity':'sum', 'lw_product_quantity':'sum', 
    'dow_product_quantity':'sum', 'product_gmv':'sum', 'lw_product_gmv':'sum', 'dow_product_gmv':'sum', 
    'pure_carts':'sum', 'lw_pure_carts':'sum', 'dow_pure_carts':'sum', 'pure_product_quantity':'sum', 'lw_pure_product_quantity':'sum', 
    'dow_pure_product_quantity':'sum', 'pure_product_gmv':'sum', 'lw_pure_product_gmv':'sum', 'dow_pure_product_gmv':'sum',
    'mix_carts':'sum', 'lw_mix_carts':'sum', 'dow_mix_carts':'sum', 'mix_product_quantity':'sum', 'lw_mix_product_quantity':'sum', 
    'dow_mix_product_quantity':'sum', 'mix_product_gmv':'sum', 'lw_mix_product_gmv':'sum', 'dow_mix_product_gmv':'sum','pure_ratio':'mean','mix_ratio':'mean'
}).reset_index()


overall_df['lift_lw_total'] = np.where(overall_df['lw_total_carts'] == 0, 0.2, (overall_df['total_carts'] / overall_df['lw_total_carts']) - 1)
overall_df['lift_dow_total'] = np.where(overall_df['dow_total_carts'] == 0, 0.2, (overall_df['total_carts'] / overall_df['dow_total_carts']) - 1)
overall_df['anticipated_lift_total'] = (overall_df['lift_lw_total'] + overall_df['lift_dow_total']) / 2

overall_df['lift_lw_pure'] = np.where(overall_df['lw_total_carts'] == 0, 0.2, (overall_df['pure_carts'] / overall_df['lw_total_carts']) - 1)
overall_df['lift_dow_pure'] = np.where(overall_df['dow_total_carts'] == 0, 0.2, (overall_df['pure_carts'] / overall_df['dow_total_carts']) - 1)
overall_df['anticipated_lift_pure'] = (overall_df['lift_lw_pure'] + overall_df['lift_dow_pure']) / 2

overall_df['lift_lw_mix'] = np.where(overall_df['lw_mix_carts'] == 0, 0.2, (overall_df['mix_carts'] / overall_df['lw_mix_carts']) - 1)
overall_df['lift_dow_mix'] = np.where(overall_df['dow_mix_carts'] == 0, 0.2, (overall_df['mix_carts'] / overall_df['dow_mix_carts']) - 1)
overall_df['anticipated_lift_mix'] = (overall_df['lift_lw_mix'] + overall_df['lift_dow_mix']) / 2

overall_df['city'] = 'All'

#overall_df = overall_df[['date', 'dow', 'city', 'total_carts', 'lw_carts', 'dow_carts','product_quantity', 'lw_quantity', 'dow_quantity', 'lift_lw', 'lift_dow', 'anticipated_lift', 'product_gmv', 'lw_gmv', 'dow_gmv',]]


In [24]:
overall_df.head()

Unnamed: 0,date,dow,item_id,total_carts,lw_total_carts,dow_total_carts,product_quantity,lw_product_quantity,dow_product_quantity,product_gmv,lw_product_gmv,dow_product_gmv,pure_carts,lw_pure_carts,dow_pure_carts,pure_product_quantity,lw_pure_product_quantity,dow_pure_product_quantity,pure_product_gmv,lw_pure_product_gmv,dow_pure_product_gmv,mix_carts,lw_mix_carts,dow_mix_carts,mix_product_quantity,lw_mix_product_quantity,dow_mix_product_quantity,mix_product_gmv,lw_mix_product_gmv,dow_mix_product_gmv,pure_ratio,mix_ratio,lift_lw_total,lift_dow_total,anticipated_lift_total,lift_lw_pure,lift_dow_pure,anticipated_lift_pure,lift_lw_mix,lift_dow_mix,anticipated_lift_mix,city
0,2022-03-26,5,10000608,790,390.0,620.833,2862.0,1310.0,1861.75,254887.0,90752.0,78887.167,631.0,327.0,534.25,2061.0,1001.0,1446.75,207891.0,75692.0,62355.333,159.0,63.0,86.583,801.0,309.0,415.0,46996.0,15060.0,16531.833,0.769,0.231,1.026,0.272,0.649,0.618,0.016,0.317,1.524,0.836,1.18,All
1,2022-03-26,5,10004485,662,455.0,504.833,1620.0,1074.0,1184.083,66224.0,41930.0,45672.333,564.0,421.0,451.667,1164.0,899.0,919.0,45396.0,35056.0,35516.5,98.0,34.0,53.167,456.0,175.0,265.083,20828.0,6874.0,10155.833,0.902,0.098,0.455,0.311,0.383,0.24,0.117,0.178,1.882,0.843,1.363,All
2,2022-03-26,5,10004491,1023,415.0,514.5,2996.0,1145.0,1518.25,131944.0,48514.0,60589.0,838.0,349.0,433.0,2117.0,832.0,1124.5,84680.0,33260.0,44680.0,185.0,66.0,81.5,879.0,313.0,393.75,47264.0,15254.0,15909.0,0.824,0.176,1.465,0.988,1.227,1.019,0.629,0.824,1.803,1.27,1.536,All


In [25]:
event_date_df = pd.concat([overall_df, event_date_df,])
event_date_df = event_date_df[event_date_df.city != '0']

event_date_df.sort_values(by = ['city']).to_csv('event_date_df.csv', index = False)

event_date_df.head()

Unnamed: 0,date,dow,item_id,total_carts,lw_total_carts,dow_total_carts,product_quantity,lw_product_quantity,dow_product_quantity,product_gmv,lw_product_gmv,dow_product_gmv,pure_carts,lw_pure_carts,dow_pure_carts,pure_product_quantity,lw_pure_product_quantity,dow_pure_product_quantity,pure_product_gmv,lw_pure_product_gmv,dow_pure_product_gmv,mix_carts,lw_mix_carts,dow_mix_carts,mix_product_quantity,lw_mix_product_quantity,dow_mix_product_quantity,mix_product_gmv,lw_mix_product_gmv,dow_mix_product_gmv,pure_ratio,mix_ratio,lift_lw_total,lift_dow_total,anticipated_lift_total,lift_lw_pure,lift_dow_pure,anticipated_lift_pure,lift_lw_mix,lift_dow_mix,anticipated_lift_mix,city
0,2022-03-26,5,10000608,790,390.0,620.833,2862.0,1310.0,1861.75,254887.0,90752.0,78887.167,631.0,327.0,534.25,2061.0,1001.0,1446.75,207891.0,75692.0,62355.333,159.0,63.0,86.583,801.0,309.0,415.0,46996.0,15060.0,16531.833,0.769,0.231,1.026,0.272,0.649,0.618,0.016,0.317,1.524,0.836,1.18,All
1,2022-03-26,5,10004485,662,455.0,504.833,1620.0,1074.0,1184.083,66224.0,41930.0,45672.333,564.0,421.0,451.667,1164.0,899.0,919.0,45396.0,35056.0,35516.5,98.0,34.0,53.167,456.0,175.0,265.083,20828.0,6874.0,10155.833,0.902,0.098,0.455,0.311,0.383,0.24,0.117,0.178,1.882,0.843,1.363,All
2,2022-03-26,5,10004491,1023,415.0,514.5,2996.0,1145.0,1518.25,131944.0,48514.0,60589.0,838.0,349.0,433.0,2117.0,832.0,1124.5,84680.0,33260.0,44680.0,185.0,66.0,81.5,879.0,313.0,393.75,47264.0,15254.0,15909.0,0.824,0.176,1.465,0.988,1.227,1.019,0.629,0.824,1.803,1.27,1.536,All
0,2022-03-26,5,10000608,9,8.0,7.75,43.0,30.0,28.75,4584.0,2143.0,1365.0,6.0,5.0,5.75,25.0,15.0,21.0,2907.0,1551.0,1060.25,3.0,3.0,2.0,18.0,15.0,7.75,1677.0,592.0,304.75,0.667,0.333,0.125,0.161,0.143,0.2,0.043,0.122,0.0,0.5,0.25,Ahmedabad
1,2022-03-26,5,10004485,7,8.0,5.75,20.0,11.0,11.5,783.0,429.0,447.75,5.0,8.0,5.0,12.0,11.0,9.0,468.0,429.0,350.0,2.0,0.0,0.75,8.0,0.0,2.5,315.0,0.0,97.75,0.714,0.286,-0.125,0.217,0.046,-0.375,0.0,-0.188,0.2,1.667,0.933,Ahmedabad


In [26]:
median_total = event_date_df['anticipated_lift_total'].median()
event_date_df['anticipated_lift_total'] = np.where(event_date_df['anticipated_lift_total'] <= 0, median_total, event_date_df['anticipated_lift_total'])
event_date_df['anticipated_lift_total'] = np.where(event_date_df['anticipated_lift_total'] >= 1, event_date_df['anticipated_lift_total'] - np.floor(event_date_df['anticipated_lift_total']), event_date_df['anticipated_lift_total'])

median_pure = event_date_df['anticipated_lift_pure'].median()
event_date_df['anticipated_lift_pure'] = np.where(event_date_df['anticipated_lift_pure'] <= 0, median_pure, event_date_df['anticipated_lift_pure'])
event_date_df['anticipated_lift_pure'] = np.where(event_date_df['anticipated_lift_pure'] >= 1, event_date_df['anticipated_lift_pure'] - np.floor(event_date_df['anticipated_lift_pure']), event_date_df['anticipated_lift_pure'])

median_mix = event_date_df['anticipated_lift_mix'].median()
event_date_df['anticipated_lift_mix'] = np.where(event_date_df['anticipated_lift_mix'] <= 0, median_mix, event_date_df['anticipated_lift_mix'])
event_date_df['anticipated_lift_mix'] = np.where(event_date_df['anticipated_lift_mix'] >= 1, event_date_df['anticipated_lift_mix'] - np.floor(event_date_df['anticipated_lift_mix']), event_date_df['anticipated_lift_mix'])

In [27]:
np.floor(event_date_df['anticipated_lift_mix'])

0    0.000
1    0.000
2    0.000
0    0.000
1    0.000
2    0.000
3    0.000
4    0.000
5    0.000
6    0.000
7    0.000
8    0.000
9    0.000
10   0.000
11   0.000
12   0.000
13   0.000
14   0.000
15   0.000
16   0.000
17   0.000
18   0.000
19   0.000
20   0.000
21   0.000
22   0.000
23   0.000
24   0.000
25   0.000
26   0.000
27   0.000
28   0.000
29   0.000
30   0.000
31   0.000
32   0.000
33   0.000
34   0.000
35   0.000
36   0.000
37   0.000
38   0.000
39   0.000
40   0.000
41   0.000
42   0.000
43   0.000
44   0.000
45   0.000
46   0.000
47   0.000
48   0.000
49   0.000
50   0.000
51   0.000
52   0.000
53   0.000
Name: anticipated_lift_mix, dtype: float64

In [28]:
lift_df = event_date_df[['dow','city','item_id','anticipated_lift_total','anticipated_lift_pure','anticipated_lift_mix']]

In [29]:
lift_df.head()

Unnamed: 0,dow,city,item_id,anticipated_lift_total,anticipated_lift_pure,anticipated_lift_mix
0,5,All,10000608,0.649,0.317,0.18
1,5,All,10004485,0.383,0.178,0.363
2,5,All,10004491,0.227,0.824,0.536
0,5,Ahmedabad,10000608,0.143,0.122,0.25
1,5,Ahmedabad,10004485,0.046,0.283,0.933


In [30]:
lift_df['anticipated_lift_total'].max()

0.9950000000000001

In [31]:
current_date = pd.to_datetime('2023-04-01')
current_date_lw = current_date - timedelta(days = 7)
current_date_required_dow = current_date.dayofweek

print(current_date, current_date_lw, current_date_required_dow)

2023-04-01 00:00:00 2023-03-25 00:00:00 5


In [32]:
sales_query = f"""
with item_product_mapping as
(
    SELECT
        DISTINCT
            ipr.product_id,
            case
                when
                    ipr.item_id is null
                then ipom_0.item_id else ipr.item_id end as item_id,
            case
                when
                    ipr.item_id is not null
                then
                    COALESCE(ipom.multiplier,1)
                else
                    COALESCE(ipom_0.multiplier,1)
                end AS multiplier
    FROM
        lake_rpc.item_product_mapping ipr
    left join
        dwh.dim_item_product_offer_mapping ipom
    on
        ipr.product_id = ipom.product_id
    and
            ipr.item_id = ipom.item_id
    left join
        dwh.dim_item_product_offer_mapping ipom_0
    on
        ipr.product_id = ipom_0.product_id
),

required_items as 
(
    select
        *
    from
        lake_rpc.product_product
    where
    --     active = 1
    -- and
        lower(name) ilike '%%coca-cola%%'
    and
        lower(name) ilike '%%can%%'

),

sales_base as
(
    select
        date(i.cart_checkout_ts_ist) as date,
        cl.name as city,
        i.outlet_id,
        ipm.item_id,
        i.cart_id,
        sum(1.000*i.product_quantity * COALESCE(multiplier,1)) as product_quantity,
        sum(1.000*i.total_selling_price * COALESCE(multiplier,1)) as product_gmv
    from
        dwh.fact_sales_order_item_details i
    
    inner join
        dwh.fact_sales_order_details j
    on
        i.cart_id=j.cart_id
    
    left join
        item_product_mapping ipm
    ON
        i.product_id = ipm.product_id
    
    left join
        lake_rpc.item_category_details icd
    ON
        icd.item_id = ipm.item_id
    
    left join
        lake_retail.console_outlet co
    on
        co.id = i.outlet_id
    and
        co.business_type_id = 7
    
    left join
        lake_retail.console_location cl
    on
        cl.id = tax_location_id
    
    where
        i.cart_checkout_ts_ist >= '2023-03-25'
    and
        j.is_internal_order = false
    and
        j.order_current_status = 'DELIVERED'
    and
        icd.item_id in (select distinct item_id from required_items)
    group by 1,2,3,4,5
)

SELECT * FROM sales_base
"""
sales_df = read_sql_query(sales_query, CON_REDSHIFT)
sales_df.head()

Read attempt: 0...
Time:  12.84  sec


Unnamed: 0,date,city,outlet_id,item_id,cart_id,product_quantity,product_gmv
0,2023-03-25,Bengaluru,2780.0,10000608,344140782,2.0,76.0
1,2023-03-25,Mumbai,1716.0,10000608,321473103,2.0,76.0
2,2023-03-26,Gurgaon,1010.0,10000608,343963790,1.0,38.0
3,2023-03-26,Kolkata,2793.0,10004485,344530063,1.0,38.0
4,2023-03-26,Hyderabad,2607.0,10004491,344481919,1.0,40.0


In [33]:
sales_df = sales_df[sales_df['date'] == pd.to_datetime('2023-03-25')]
sales_df.head()

Unnamed: 0,date,city,outlet_id,item_id,cart_id,product_quantity,product_gmv
0,2023-03-25,Bengaluru,2780.0,10000608,344140782,2.0,76.0
1,2023-03-25,Mumbai,1716.0,10000608,321473103,2.0,76.0
6,2023-03-25,Bengaluru,3376.0,10000608,343502093,10.0,380.0
7,2023-03-25,Delhi,2452.0,10000608,344190496,4.0,152.0
8,2023-03-25,Gurgaon,1010.0,10004485,344211492,1.0,38.0


In [34]:
sales_df[sales_df['city']=='Noida'] 

Unnamed: 0,date,city,outlet_id,item_id,cart_id,product_quantity,product_gmv


In [35]:
cart_base_df = sales_df.groupby(['date','city','cart_id']).agg({'item_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'item_id':'unique_items'}).reset_index()
cart_base_df = cart_base_df.merge(sales_df[['date','city','item_id','cart_id']], on=['date','city','cart_id'], how = 'left')

pure_carts_df = cart_base_df[cart_base_df['unique_items'] == 1]
pure_carts_df = pure_carts_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'pure_carts','product_quantity':'pure_product_quantity','product_gmv':'pure_product_gmv'}).reset_index()

mix_carts_df = cart_base_df[cart_base_df['unique_items'] > 1]
mix_carts_df = mix_carts_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'mix_carts','product_quantity':'mix_product_quantity','product_gmv':'mix_product_gmv'}).reset_index()

total_carts_df = cart_base_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'total_carts','product_quantity':'product_quantity','product_gmv':'product_gmv'}).reset_index()

final_df = total_carts_df.merge(mix_carts_df, on = ['date','city','item_id'], how = 'left')
final_df = final_df.merge(pure_carts_df, on = ['date','city','item_id'], how = 'left')
final_df = final_df.fillna(0)

final_df['pure_ratio'] = final_df['pure_carts']/final_df['total_carts']
final_df['mix_ratio'] = final_df['mix_carts']/final_df['total_carts']

final_df['dow'] = pd.to_datetime(final_df['date']).dt.dayofweek
final_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,pure_ratio,mix_ratio,dow
0,2023-03-25,Ahmedabad,10000608,57,140.0,5332.0,8.0,28.0,1076.0,49,112.0,4256.0,0.86,0.14,5
1,2023-03-25,Ahmedabad,10004485,30,63.0,2398.0,5.0,18.0,688.0,25,45.0,1710.0,0.833,0.167,5
2,2023-03-25,Ahmedabad,10004491,61,156.0,6220.0,6.0,17.0,660.0,55,139.0,5560.0,0.902,0.098,5
3,2023-03-25,Amritsar,10000608,6,20.0,800.0,0.0,0.0,0.0,6,20.0,800.0,1.0,0.0,5
4,2023-03-25,Amritsar,10004491,1,2.0,80.0,0.0,0.0,0.0,1,2.0,80.0,1.0,0.0,5


In [36]:
overall_df = final_df.groupby(['date', 'dow','item_id']).agg({
    'total_carts':'sum','product_quantity':'sum','product_gmv':'sum','pure_carts':'sum','pure_product_quantity':'sum','pure_product_gmv':'sum',
    'mix_carts':'sum','mix_product_quantity':'sum', 'mix_product_gmv':'sum','pure_ratio':'mean','mix_ratio':'mean'}).reset_index()
overall_df['city'] = 'All'
overall_df.head()

Unnamed: 0,date,dow,item_id,total_carts,product_quantity,product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_ratio,mix_ratio,city
0,2023-03-25,5,10000608,2448,6777.0,261643.0,2186,5762.0,218849.0,262.0,1015.0,42794.0,0.921,0.079,All
1,2023-03-25,5,10004485,1676,3991.0,151980.0,1529,3346.0,127216.0,147.0,645.0,24764.0,0.897,0.103,All
2,2023-03-25,5,10004491,2007,5809.0,355663.0,1786,4976.0,319559.0,221.0,833.0,36104.0,0.903,0.097,All


In [37]:
final_df = pd.concat([overall_df, final_df,])

In [38]:
final_df = final_df.merge(lift_df, on=['dow','city','item_id'], how='left')
final_df.head()

Unnamed: 0,date,dow,item_id,total_carts,product_quantity,product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_ratio,mix_ratio,city,anticipated_lift_total,anticipated_lift_pure,anticipated_lift_mix
0,2023-03-25,5,10000608,2448,6777.0,261643.0,2186,5762.0,218849.0,262.0,1015.0,42794.0,0.921,0.079,All,0.649,0.317,0.18
1,2023-03-25,5,10004485,1676,3991.0,151980.0,1529,3346.0,127216.0,147.0,645.0,24764.0,0.897,0.103,All,0.383,0.178,0.363
2,2023-03-25,5,10004491,2007,5809.0,355663.0,1786,4976.0,319559.0,221.0,833.0,36104.0,0.903,0.097,All,0.227,0.824,0.536
3,2023-03-25,5,10000608,57,140.0,5332.0,49,112.0,4256.0,8.0,28.0,1076.0,0.86,0.14,Ahmedabad,0.143,0.122,0.25
4,2023-03-25,5,10004485,30,63.0,2398.0,25,45.0,1710.0,5.0,18.0,688.0,0.833,0.167,Ahmedabad,0.046,0.283,0.933


In [39]:
final_df.shape

(67, 18)

In [40]:
non_available_store_total = len(final_df[final_df['anticipated_lift_total'].isna()])
median_total = median_total/non_available_store_total
final_df['anticipated_lift_total'] = np.where(final_df['anticipated_lift_total'].isna(), median_total, final_df['anticipated_lift_total'])

non_available_store_pure = len(final_df[final_df['anticipated_lift_pure'].isna()])
median_pure = median_pure/non_available_store_pure
final_df['anticipated_lift_pure'] = np.where(final_df['anticipated_lift_pure'].isna(), median_pure, final_df['anticipated_lift_pure'])

non_available_store_mix = len(final_df[final_df['anticipated_lift_mix'].isna()])
median_mix = median_mix/non_available_store_pure
final_df['anticipated_lift_mix'] = np.where(final_df['anticipated_lift_mix'].isna(), median_mix, final_df['anticipated_lift_mix'])

In [41]:
result_df = final_df.copy()

result_df['new_total_carts'] = result_df['total_carts'] + (result_df['total_carts'] * result_df['anticipated_lift_total'])
result_df['new_pure_total_carts'] = result_df['pure_carts'] + (result_df['pure_carts'] * result_df['anticipated_lift_pure'])
result_df['new_mix_total_carts'] = result_df['mix_carts'] + (result_df['mix_carts'] * result_df['anticipated_lift_mix'])

result_df['qpc_total'] = result_df['total_carts']/result_df['new_total_carts']
result_df['qpc_pure'] = result_df['pure_carts']/result_df['new_pure_total_carts']
result_df['qpc_mix'] = result_df['mix_carts']/result_df['new_mix_total_carts']

result_df= result_df.fillna(0)

result_df.head()

Unnamed: 0,date,dow,item_id,total_carts,product_quantity,product_gmv,pure_carts,pure_product_quantity,pure_product_gmv,mix_carts,mix_product_quantity,mix_product_gmv,pure_ratio,mix_ratio,city,anticipated_lift_total,anticipated_lift_pure,anticipated_lift_mix,new_total_carts,new_pure_total_carts,new_mix_total_carts,qpc_total,qpc_pure,qpc_mix
0,2023-03-25,5,10000608,2448,6777.0,261643.0,2186,5762.0,218849.0,262.0,1015.0,42794.0,0.921,0.079,All,0.649,0.317,0.18,4036.904,2879.317,309.185,0.606,0.759,0.847
1,2023-03-25,5,10004485,1676,3991.0,151980.0,1529,3346.0,127216.0,147.0,645.0,24764.0,0.897,0.103,All,0.383,0.178,0.363,2318.133,1801.744,200.333,0.723,0.849,0.734
2,2023-03-25,5,10004491,2007,5809.0,355663.0,1786,4976.0,319559.0,221.0,833.0,36104.0,0.903,0.097,All,0.227,0.824,0.536,2461.985,3257.702,339.563,0.815,0.548,0.651
3,2023-03-25,5,10000608,57,140.0,5332.0,49,112.0,4256.0,8.0,28.0,1076.0,0.86,0.14,Ahmedabad,0.143,0.122,0.25,65.159,54.965,10.0,0.875,0.891,0.8
4,2023-03-25,5,10004485,30,63.0,2398.0,25,45.0,1710.0,5.0,18.0,688.0,0.833,0.167,Ahmedabad,0.046,0.283,0.933,31.386,32.083,9.667,0.956,0.779,0.517


city_result_df = result_df.groupby(['date','dow','city']).agg({'new_total_carts':'sum','new_pure_total_carts':'sum','new_mix_total_carts':'sum'}).rename(
    columns={'new_total_carts':'city_new_total_carts','new_pure_total_carts':'city_new_pure_total_carts','new_mix_total_carts':'city_new_mix_total_carts'}).reset_index()

result_df = result_df.merge(city_result_df[['date','dow','city','city_new_total_carts','city_new_pure_total_carts','city_new_mix_total_carts']], 
                            on = ['date','dow','city'], how = 'left').reset_index()

result_df['cp_total'] = result_df['new_total_carts']/sum(result_df['city_new_total_carts'])
result_df['cp_pure'] = result_df['new_pure_total_carts']/sum(result_df['city_new_pure_total_carts'])
result_df['cp_mix'] = result_df['new_mix_total_carts']/sum(result_df['city_new_mix_total_carts'])

result_df= result_df.fillna(0)

result_df.head()

In [42]:
result_df = result_df[['date','dow','city','item_id','total_carts','pure_carts','mix_carts','new_total_carts','new_pure_total_carts','new_mix_total_carts','qpc_total','qpc_pure','qpc_mix',
                       'anticipated_lift_total','anticipated_lift_pure','anticipated_lift_mix','pure_ratio','mix_ratio']]
result_df.head()

Unnamed: 0,date,dow,city,item_id,total_carts,pure_carts,mix_carts,new_total_carts,new_pure_total_carts,new_mix_total_carts,qpc_total,qpc_pure,qpc_mix,anticipated_lift_total,anticipated_lift_pure,anticipated_lift_mix,pure_ratio,mix_ratio
0,2023-03-25,5,All,10000608,2448,2186,262.0,4036.904,2879.317,309.185,0.606,0.759,0.847,0.649,0.317,0.18,0.921,0.079
1,2023-03-25,5,All,10004485,1676,1529,147.0,2318.133,1801.744,200.333,0.723,0.849,0.734,0.383,0.178,0.363,0.897,0.103
2,2023-03-25,5,All,10004491,2007,1786,221.0,2461.985,3257.702,339.563,0.815,0.548,0.651,0.227,0.824,0.536,0.903,0.097
3,2023-03-25,5,Ahmedabad,10000608,57,49,8.0,65.159,54.965,10.0,0.875,0.891,0.8,0.143,0.122,0.25,0.86,0.14
4,2023-03-25,5,Ahmedabad,10004485,30,25,5.0,31.386,32.083,9.667,0.956,0.779,0.517,0.046,0.283,0.933,0.833,0.167


In [43]:
result_df.to_csv('city_item_raw.csv',index=False)