In [1]:
# Packages

import pencilbox as pb
import pandas as pd
import datetime as dt
import time
import numpy as np
from calendar import monthrange
from datetime import timedelta, datetime, date
import gc
import warnings
import math
warnings.filterwarnings("ignore")

# Connection

CON_TRINO = pb.get_connection("[Warehouse] Trino")
CON_REDSHIFT = pb.get_connection("[Warehouse] Redshift")

def read_sql_query(sql, con):
    max_tries = 1
    for attempt in range(max_tries):
        print(f"Read attempt: {attempt}...")
        try:
            start = time.time()
            df = pd.read_sql_query(sql, con)
            end = time.time()
            if (end - start) > 60:
                print("Time: ", round((end - start) / 60, 2), " min")
            else:
                print("Time: ", round(end - start, 2), " sec")
            return df
            break
        except BaseException as e:
            print(e)
            time.sleep(10)
            
# Viewing configs

pd.set_option("display.max_columns", None)
pd.set_option("display.float_format", lambda x: "%.3f" % x)

# pd.set_option("display.max_rows", None)

In [2]:
# start_date = pd.to_datetime(datetime.now() + timedelta(hours = 5.5) - timedelta(days = 26)).date()
# start_date

# Sales Query

In [3]:
sales_query = f"""
with item_product_mapping as
(
    SELECT
        DISTINCT
            ipr.product_id,
            case
                when
                    ipr.item_id is null
                then ipom_0.item_id else ipr.item_id end as item_id,
            case
                when
                    ipr.item_id is not null
                then
                    COALESCE(ipom.multiplier,1)
                else
                    COALESCE(ipom_0.multiplier,1)
                end AS multiplier
    FROM
        lake_rpc.item_product_mapping ipr
    left join
        dwh.dim_item_product_offer_mapping ipom
    on
        ipr.product_id = ipom.product_id
    and
            ipr.item_id = ipom.item_id
    left join
        dwh.dim_item_product_offer_mapping ipom_0
    on
        ipr.product_id = ipom_0.product_id
),

required_items as 
(
    select
        *
    from
        lake_rpc.product_product
    where
    --     active = 1
    -- and
        lower(name) ilike '%%coca-cola%%'
    and
        lower(name) ilike '%%can%%'

),

sales_base as
(
    select
        date(i.cart_checkout_ts_ist) as date,
        cl.name as city,
        i.outlet_id,
        ipm.item_id,
        i.cart_id,
        sum(1.000*i.product_quantity * COALESCE(multiplier,1)) as product_quantity,
        sum(1.000*i.total_selling_price * COALESCE(multiplier,1)) as product_gmv
    from
        dwh.fact_sales_order_item_details i
    
    inner join
        dwh.fact_sales_order_details j
    on
        i.cart_id=j.cart_id
    
    left join
        item_product_mapping ipm
    ON
        i.product_id = ipm.product_id
    
    left join
        lake_rpc.item_category_details icd
    ON
        icd.item_id = ipm.item_id
    
    left join
        lake_retail.console_outlet co
    on
        co.id = i.outlet_id
    and
        co.business_type_id = 7
    
    left join
        lake_retail.console_location cl
    on
        cl.id = tax_location_id
    
    where
        i.cart_checkout_ts_ist >= '2023-03' - 35
    and
        j.is_internal_order = false
    and
        j.order_current_status = 'DELIVERED'
    and
        icd.item_id in (select distinct item_id from required_items)
    group by 1,2,3,4,5
)

SELECT * FROM sales_base
"""
sales_df = read_sql_query(sales_query, CON_REDSHIFT)
sales_df.head()

Read attempt: 0...
Time:  18.45  sec


Unnamed: 0,date,city,outlet_id,item_id,cart_id,product_quantity,product_gmv
0,2023-02-24,Hyderabad,2266.0,10004485,328277071,1.0,38.0
1,2023-02-24,Ahmedabad,1337.0,10004491,328749563,3.0,120.0
2,2023-02-24,Delhi,3241.0,10000608,328782486,1.0,38.0
3,2023-02-26,Ghaziabad,1976.0,10004491,328237350,4.0,160.0
4,2023-02-26,Mumbai,3016.0,10004491,329976525,1.0,40.0


In [4]:
cart_base_df = sales_df.groupby(['date','city','cart_id']).agg({'item_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'item_id':'unique_items'}).reset_index()
cart_base_df = cart_base_df.merge(sales_df[['date','city','item_id','cart_id']], on=['date','city','cart_id'], how = 'left')
cart_base_df.head()

Unnamed: 0,date,city,cart_id,unique_items,product_quantity,product_gmv,item_id
0,2023-02-24,Ahmedabad,293189145,1,1.0,40.0,10004491
1,2023-02-24,Ahmedabad,307796259,1,3.0,120.0,10004491
2,2023-02-24,Ahmedabad,313613441,1,1.0,38.0,10000608
3,2023-02-24,Ahmedabad,315770500,1,1.0,38.0,10000608
4,2023-02-24,Ahmedabad,315988226,2,3.0,133.0,10004491


In [5]:
pure_carts_df = cart_base_df[cart_base_df['unique_items'] == 1]
pure_carts_df = pure_carts_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'pure_carts','product_quantity':'pure_product_quantity','product_gmv':'pure_product_gmv'}).reset_index()
pure_carts_df.head()

Unnamed: 0,date,city,item_id,pure_carts,pure_product_quantity,pure_product_gmv
0,2023-02-24,Ahmedabad,10000608,56,184.0,6992.0
1,2023-02-24,Ahmedabad,10004485,14,21.0,798.0
2,2023-02-24,Ahmedabad,10004491,56,129.0,5175.0
3,2023-02-24,Bengaluru,10000608,328,852.0,32376.0
4,2023-02-24,Bengaluru,10004485,214,399.0,15162.0


In [6]:
mix_carts_df = cart_base_df[cart_base_df['unique_items'] > 1]
mix_carts_df = mix_carts_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'mix_carts','product_quantity':'mix_product_quantity','product_gmv':'mix_product_gmv'}).reset_index()
mix_carts_df.head()

Unnamed: 0,date,city,item_id,mix_carts,mix_product_quantity,mix_product_gmv
0,2023-02-24,Ahmedabad,10000608,11,44.0,1733.0
1,2023-02-24,Ahmedabad,10004485,3,13.0,508.0
2,2023-02-24,Ahmedabad,10004491,14,57.0,2241.0
3,2023-02-24,Bengaluru,10000608,67,299.0,11710.0
4,2023-02-24,Bengaluru,10004485,30,140.0,5410.0


In [7]:
total_carts_df = cart_base_df.groupby(['date','city','item_id']).agg({'cart_id':'nunique','product_quantity':'sum','product_gmv':'sum'}).rename(columns={'cart_id':'total_carts','product_quantity':'product_quantity','product_gmv':'product_gmv'}).reset_index()
total_carts_df.head()

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv
0,2023-02-24,Ahmedabad,10000608,67,228.0,8725.0
1,2023-02-24,Ahmedabad,10004485,17,34.0,1306.0
2,2023-02-24,Ahmedabad,10004491,70,186.0,7416.0
3,2023-02-24,Bengaluru,10000608,395,1151.0,44086.0
4,2023-02-24,Bengaluru,10004485,244,539.0,20572.0


In [8]:
total_carts_df[total_carts_df['date'] == pd.to_datetime('2023-03-26')]

Unnamed: 0,date,city,item_id,total_carts,product_quantity,product_gmv
1822,2023-03-26,Ahmedabad,10000608,73,168.000,6400.000
1823,2023-03-26,Ahmedabad,10004485,25,62.000,2356.000
1824,2023-03-26,Ahmedabad,10004491,54,128.000,5102.000
1825,2023-03-26,Amritsar,10000608,7,21.000,840.000
1826,2023-03-26,Amritsar,10004485,2,3.000,120.000
...,...,...,...,...,...,...
1879,2023-03-26,Vadodara,10004485,4,7.000,266.000
1880,2023-03-26,Vadodara,10004491,17,47.000,1858.000
1881,2023-03-26,Zirakpur,10000608,13,21.000,798.000
1882,2023-03-26,Zirakpur,10004485,16,30.000,1140.000


In [None]:
final_df = total_carts_df.merge(mix_carts_df, on = ['date','city','item_id'], how = 'left')
final_df = final_df.merge(pure_carts_df, on = ['date','city','item_id'], how = 'left')
final_df = final_df.fillna(0)
final_df.head()

In [None]:
final_df['pure_ratio'] = final_df['pure_carts']/final_df['total_carts']
final_df['mix_ratio'] = final_df['mix_carts']/final_df['total_carts']
final_df.head()

In [None]:
final_df['dow'] = pd.to_datetime(final_df['date']).dt.dayofweek
final_df.head(2)

In [None]:
event_date = pd.to_datetime(pd.to_datetime('today').normalize() - timedelta(days = 1))
comp_date_lw = event_date - timedelta(days = 7)
required_dow = event_date.dayofweek

print(event_date, comp_date_lw, required_dow)

#final_df['comp_date_lw'] = final_df['date'] - timedelta(days = 7)

#final_df['required_dow'] = pd.to_datetime(final_df['date']).dt.dayofweek

#final_df.head(2)

In [None]:
event_date_df = final_df[pd.to_datetime(final_df['date']) == pd.to_datetime(event_date)]
event_date_df.head()

In [None]:
comp_date_lw_df = final_df[pd.to_datetime(final_df['date']) == pd.to_datetime(comp_date_lw)]
comp_date_lw_df.head()

In [None]:
dow_df = final_df[(pd.to_datetime(final_df['date']) >= pd.to_datetime(event_date) - timedelta(days = 30)) & (pd.to_datetime(final_df['date']) <= pd.to_datetime(event_date) - timedelta(days = 1))& (final_df['dow'] == required_dow)]
dow_df.head()

In [None]:
comp_date_lw_df = comp_date_lw_df[['city','item_id', 'total_carts', 'product_quantity', 'product_gmv','mix_carts','mix_product_quantity','mix_product_gmv','pure_carts','pure_product_quantity','pure_product_gmv']]
comp_date_lw_df.columns = ['city','item_id', 'lw_total_carts', 'lw_product_quantity', 'lw_product_gmv','lw_mix_carts','lw_mix_product_quantity','lw_mix_product_gmv','lw_pure_carts','lw_pure_product_quantity','lw_pure_product_gmv']
comp_date_lw_df.head(2)

In [None]:
dow_df = dow_df.groupby(['city','item_id']).agg({'total_carts':'mean', 'product_quantity':'mean', 'product_gmv':'mean','mix_carts':'mean','mix_product_quantity':'mean','mix_product_gmv':'mean','pure_carts':'mean','pure_product_quantity':'mean','pure_product_gmv':'mean'}).reset_index()
dow_df.columns = ['city','item_id', 'dow_total_carts', 'dow_product_quantity', 'dow_product_gmv','dow_mix_carts','dow_mix_product_quantity','dow_mix_product_gmv','dow_pure_carts','dow_pure_product_quantity','dow_pure_product_gmv']
dow_df.head(2)

In [None]:
event_date_df = pd.merge(event_date_df, comp_date_lw_df, on = ['city','item_id'], how = 'left')
event_date_df = pd.merge(event_date_df, dow_df, on = ['city','item_id'], how = 'left')

In [None]:
event_date_df.head(1)

In [None]:
# event_date_df = event_date_df.fillna(0)
event_date_df['lift_lw_total'] = np.where(event_date_df['lw_total_carts'] == 0, 0.2, (event_date_df['total_carts'] / event_date_df['lw_total_carts']) - 1)
event_date_df['lift_dow_total'] = np.where(event_date_df['dow_total_carts'] == 0, 0.2, (event_date_df['total_carts'] / event_date_df['dow_total_carts']) - 1)

event_date_df['lift_lw_pure'] = np.where(event_date_df['lw_pure_carts'] == 0, 0.2, (event_date_df['pure_carts'] / event_date_df['lw_pure_carts']) - 1)
event_date_df['lift_dow_pure'] = np.where(event_date_df['dow_pure_carts'] == 0, 0.2, (event_date_df['pure_carts'] / event_date_df['dow_pure_carts']) - 1)

event_date_df['lift_lw_mix'] = np.where(event_date_df['lw_mix_carts'] == 0, 0.2, (event_date_df['mix_carts'] / event_date_df['lw_mix_carts']) - 1)
event_date_df['lift_dow_mix'] = np.where(event_date_df['dow_mix_carts'] == 0, 0.2, (event_date_df['mix_carts'] / event_date_df['dow_mix_carts']) - 1)

event_date_df = event_date_df.fillna(0)
#event_date_df = event_date_df[['date', 'dow', 'city', 'total_carts', 'lw_carts', 'dow_carts', 'product_quantity', 'lw_quantity', 'dow_quantity', 'lift_lw', 'lift_dow',]]
event_date_df.head(2)

In [None]:
event_date_df['anticipated_lift_total'] = (event_date_df['lift_lw_total'] + event_date_df['lift_dow_total']) / 2
event_date_df['anticipated_lift_pure'] = (event_date_df['lift_lw_pure'] + event_date_df['lift_dow_pure']) / 2
event_date_df['anticipated_lift_mix'] = (event_date_df['lift_lw_mix'] + event_date_df['lift_dow_mix']) / 2

#event_date_df = event_date_df[['date', 'dow', 'city', 'total_carts', 'lw_carts', 'dow_carts','product_quantity', 'lw_quantity', 'dow_quantity', 'lift_lw', 'lift_dow', 'anticipated_lift', 'product_gmv', 'lw_gmv', 'dow_gmv',]]

In [None]:
event_date_df.head(2)

In [None]:
event_date_df.columns

In [None]:
event_date_df = event_date_df.dropna(subset=['city', 'item_id'])
event_date_df = event_date_df.fillna(0)
event_date_df.isna().sum()

In [None]:
overall_df = event_date_df.groupby(['date', 'dow','item_id']).agg({
    'total_carts':'sum', 'lw_total_carts':'sum', 'dow_total_carts':'sum', 'product_quantity':'sum', 'lw_product_quantity':'sum', 
    'dow_product_quantity':'sum', 'product_gmv':'sum', 'lw_product_gmv':'sum', 'dow_product_gmv':'sum', 
    'pure_carts':'sum', 'lw_pure_carts':'sum', 'dow_pure_carts':'sum', 'pure_product_quantity':'sum', 'lw_pure_product_quantity':'sum', 
    'dow_pure_product_quantity':'sum', 'pure_product_gmv':'sum', 'lw_pure_product_gmv':'sum', 'dow_pure_product_gmv':'sum',
    'mix_carts':'sum', 'lw_mix_carts':'sum', 'dow_mix_carts':'sum', 'mix_product_quantity':'sum', 'lw_mix_product_quantity':'sum', 
    'dow_mix_product_quantity':'sum', 'mix_product_gmv':'sum', 'lw_mix_product_gmv':'sum', 'dow_mix_product_gmv':'sum'
}).reset_index()


overall_df['lift_lw_total'] = np.where(overall_df['lw_total_carts'] == 0, 0.2, overall_df['total_carts'] / overall_df['lw_total_carts'] - 1)
overall_df['lift_dow_total'] = np.where(overall_df['dow_total_carts'] == 0, 0.2, overall_df['total_carts'] / overall_df['dow_total_carts'] - 1)
overall_df['anticipated_lift_total'] = (overall_df['lift_lw_total'] + overall_df['lift_dow_total']) / 2

overall_df['lift_lw_pure'] = np.where(overall_df['lw_total_carts'] == 0, 0.2, overall_df['pure_carts'] / overall_df['lw_total_carts'] - 1)
overall_df['lift_dow_pure'] = np.where(overall_df['dow_total_carts'] == 0, 0.2, overall_df['pure_carts'] / overall_df['dow_total_carts'] - 1)
overall_df['anticipated_lift_pure'] = (overall_df['lift_lw_pure'] + overall_df['lift_dow_pure']) / 2

overall_df['lift_lw_mix'] = np.where(overall_df['lw_mix_carts'] == 0, 0.2, overall_df['mix_carts'] / overall_df['lw_mix_carts'] - 1)
overall_df['lift_dow_mix'] = np.where(overall_df['dow_mix_carts'] == 0, 0.2, overall_df['mix_carts'] / overall_df['dow_mix_carts'] - 1)
overall_df['anticipated_lift_mix'] = (overall_df['lift_lw_mix'] + overall_df['lift_dow_mix']) / 2

overall_df['city'] = 'All'

#overall_df = overall_df[['date', 'dow', 'city', 'total_carts', 'lw_carts', 'dow_carts','product_quantity', 'lw_quantity', 'dow_quantity', 'lift_lw', 'lift_dow', 'anticipated_lift', 'product_gmv', 'lw_gmv', 'dow_gmv',]]


In [None]:
overall_df.head()

In [None]:
event_date_df = pd.concat([overall_df, event_date_df,])
event_date_df = event_date_df[event_date_df.city != '0']

#event_date_df.sort_values(by = ['city','item_id']).to_csv('event_date_df.csv', index = False)

In [None]:
event_date_df[event_date_df['city']== 'All']

In [None]:
result_df = event_date_df.copy()

In [None]:
result_df['new_total_carts'] = result_df['total_carts'] + (result_df['total_carts'] * result_df['anticipated_lift_total'])
result_df['new_pure_total_carts'] = result_df['pure_carts'] + (result_df['pure_carts'] * result_df['anticipated_lift_pure'])
result_df['new_mix_total_carts'] = result_df['mix_carts'] + (result_df['mix_carts'] * result_df['anticipated_lift_mix'])

In [None]:
result_df.head()

In [None]:
result_df['qpc_total'] = result_df['product_quantity']/result_df['new_total_carts']
result_df['qpc_pure'] = result_df['pure_product_quantity']/result_df['new_pure_total_carts']
result_df['qpc_mix'] = result_df['mix_product_quantity']/result_df['new_mix_total_carts']

In [None]:
city_result_df = result_df.groupby(['date','dow','city']).agg({'new_total_carts':'sum','new_pure_total_carts':'sum','new_mix_total_carts':'sum'}).rename(
    columns={'new_total_carts':'city_new_total_carts','new_pure_total_carts':'city_new_pure_total_carts','new_mix_total_carts':'city_new_mix_total_carts'}).reset_index()
result_df = result_df.merge(city_result_df[['date','dow','city','city_new_total_carts','city_new_pure_total_carts','city_new_mix_total_carts']], 
                            on = ['date','dow','city'], how = 'left').reset_index()

In [None]:
result_df['cp_total'] = result_df['new_total_carts']/sum(result_df['city_new_total_carts'])
result_df['cp_pure'] = result_df['new_pure_total_carts']/sum(result_df['city_new_pure_total_carts'])
result_df['cp_mix'] = result_df['new_mix_total_carts']/sum(result_df['city_new_mix_total_carts'])

In [None]:
result_df.head()

In [None]:
result_df = result_df[['date','dow','city','item_id','total_carts','pure_carts','mix_carts','new_total_carts','new_pure_total_carts','new_mix_total_carts','qpc_total','qpc_pure','qpc_mix','cp_total','cp_pure',
                       'cp_mix','anticipated_lift_total','anticipated_lift_pure','anticipated_lift_mix']]


In [None]:
result_df.to_csv('output_item.csv', index=False)

# Overall Perf

In [None]:
ov_sales_query = f"""
with item_product_mapping as
(
    SELECT
        DISTINCT
            ipr.product_id,
            case
                when
                    ipr.item_id is null
                then ipom_0.item_id else ipr.item_id end as item_id,
            case
                when
                    ipr.item_id is not null
                then
                    COALESCE(ipom.multiplier,1)
                else
                    COALESCE(ipom_0.multiplier,1)
                end AS multiplier
    FROM
        lake_rpc.item_product_mapping ipr
    left join
        dwh.dim_item_product_offer_mapping ipom
    on
        ipr.product_id = ipom.product_id
    and
            ipr.item_id = ipom.item_id
    left join
        dwh.dim_item_product_offer_mapping ipom_0
    on
        ipr.product_id = ipom_0.product_id
),

sales_base as
(
    select
        date(i.cart_checkout_ts_ist) as date,
        cl.name as city,
        count(distinct i.cart_id) as overall_carts,
        sum(1.000*i.product_quantity * COALESCE(multiplier,1)) as overall_quantity,
        sum(1.000*i.total_selling_price * COALESCE(multiplier,1)) as overall_gmv
    from
        dwh.fact_sales_order_item_details i
    
    inner join
        dwh.fact_sales_order_details j
    on
        i.cart_id=j.cart_id
    
    left join
        item_product_mapping ipm
    ON
        i.product_id = ipm.product_id
    
    left join
        lake_rpc.item_category_details icd
    ON
        icd.item_id = ipm.item_id
    
    
    left join
        lake_retail.console_outlet co
    on
        co.id = i.outlet_id
    and
        co.business_type_id = 7
    
    left join
        lake_retail.console_location cl
    on
        cl.id = tax_location_id
    
    where
        i.cart_checkout_ts_ist >= '2022-01-26'
    and
        j.is_internal_order = false
    and
        j.order_current_status = 'DELIVERED'
    group by 1,2
)

SELECT
    *
FROM
    sales_base
"""
ov_sales_df = read_sql_query(ov_sales_query, CON_REDSHIFT)

In [None]:
ov_sales_df['dow'] = pd.to_datetime(ov_sales_df['date']).dt.dayofweek
ov_sales_df.head(2)

In [None]:
ov_event_date_df = ov_sales_df[pd.to_datetime(ov_sales_df['date']) == pd.to_datetime(event_date)]
ov_comp_date_lw_df = ov_sales_df[pd.to_datetime(ov_sales_df['date']) == pd.to_datetime(comp_date_lw)]

ov_dow_df = ov_sales_df[(pd.to_datetime(ov_sales_df['date']) >= pd.to_datetime(event_date) - timedelta(days = 30)) 
                  & (pd.to_datetime(ov_sales_df['date']) <= pd.to_datetime(event_date) - timedelta(days = 1))
                  & (ov_sales_df['dow'] == required_dow)
                 ]


In [None]:
ov_event_date_df.head(1)

In [None]:

ov_comp_date_lw_df = ov_comp_date_lw_df[['city', 'overall_carts', 'overall_quantity', 'overall_gmv']]
ov_comp_date_lw_df.columns = ['city', 'ov_lw_carts', 'ov_lw_quantity', 'ov_lw_gmv']


ov_dow_df = ov_dow_df.groupby(['city', ]).agg({'overall_carts':'mean', 'overall_quantity':'mean', 'overall_gmv':'mean'}).reset_index()
ov_dow_df.columns = ['city', 'ov_dow_carts', 'ov_dow_quantity', 'ov_dow_gmv']
ov_event_date_df = pd.merge(ov_event_date_df, ov_comp_date_lw_df, on = ['city',], how = 'left')
ov_event_date_df = pd.merge(ov_event_date_df, ov_dow_df, on = ['city',], how = 'left')


In [None]:
ov_event_date_df = ov_event_date_df.drop(columns = {'date', 'dow'})
ov_event_date_df.head(1)

In [None]:
ov_event_date_df = ov_event_date_df.fillna(0)

In [None]:
ov_event_date_df.columns

In [None]:
all_ov_event_date_df = ov_event_date_df.copy()

all_ov_event_date_df['city'] = '0. All'

all_ov_event_date_df = all_ov_event_date_df.groupby(['city']).agg({'overall_carts':'sum', 'overall_quantity':'sum', 'overall_gmv':'sum',
       'ov_lw_carts':'sum', 'ov_lw_quantity':'sum', 'ov_lw_gmv':'sum', 'ov_dow_carts':'sum',
       'ov_dow_quantity':'sum', 'ov_dow_gmv':'sum'}).reset_index()


all_ov_event_date_df = all_ov_event_date_df[ov_event_date_df.columns]
all_ov_event_date_df.head()

In [None]:
all_ov_event_date_df = pd.concat([all_ov_event_date_df, ov_event_date_df])
all_ov_event_date_df.head(1)

In [None]:
event_date_df = pd.merge(event_date_df, all_ov_event_date_df, on = ['city'], how = 'left')
event_date_df.head(1)

In [None]:
event_date_df.columns

In [None]:
event_date_df = event_date_df[['date', 'dow', 'city', 'item_id', 'item_name', 'overall_carts', 'ov_lw_carts', 'ov_dow_carts', 'carts', 'lw_carts',
       'dow_carts', 'product_quantity', 'lw_quantity', 'dow_quantity',
       'lift_lw', 'lift_dow', 'anticipated_lift', 'product_gmv', 'lw_gmv',
       'dow_gmv', 'overall_quantity',  'ov_lw_quantity', 'ov_dow_quantity', 'overall_gmv', 'ov_lw_gmv', 'ov_dow_gmv']]
event_date_df.head()

In [None]:
event_date_df = event_date_df[event_date_df.city != 0]
event_date_df.sort_values(by = ['city', 'item_id']).to_csv('event_date_df.csv', index = False)