In [1]:
import pandas as pd
import numpy as np
import psycopg2 as psy
import query_to_redshift as qtr

In [2]:
original_df_up = pd.read_csv('../raw_data/Excel_files/Item_margin_up_pre_campaign_202106.csv', index_col=0)
original_df_down = pd.read_csv('../raw_data/Excel_files/Item_margin_down_pre_campaign_202106.csv', index_col=0)

In [3]:
homie_days_query = """
SELECT item_code,
       MIN(period_date),
       MAX(period_date),
       webshop_code,
       final_price_amount,
       item_campaign_key,
       item_campaign_label,
       item_campaign_category
  from public.item_catalog_history
 where item_campaign_category like '%HomieDay%'
   and item_campaign_key like '%14%'
   and sellable_flag = true
   and webshop_code = 'DE'
   and period_date >= '2021-04-29'
 GROUP BY item_code, webshop_code, final_price_amount,
 item_campaign_label, item_campaign_key, item_campaign_category
HAVING (MAX(period_date) - MIN(period_date)) > 14."""


In [4]:
homie_bazooka_query = """
SELECT item_code,
       MIN(period_date) as min_date,
       MAX(period_date) as max_date,
       --webshop_code,
       MAX(final_price_amount) as max_price,
       MAX(item_campaign_key) as max_item_campaign_key,
       MAX(item_campaign_label) as max_item_campaign_label,
       MAX(item_campaign_category) as max_item_campaign_category
  from public.item_catalog_history
 where item_campaign_category like '[CAMPAIGN]HomieDay_Voucher_CW17|%'
   and item_campaign_key like 'party14%'
   and sellable_flag = true
   and webshop_code = 'DE'
   and period_date >= '2021-04-29'
   --and item_code = '000000001000109588'
 GROUP BY item_code
HAVING MIN(period_date) = '2021-04-29'
   and MAX(period_date) = '2021-05-10';
   """


In [5]:
no_campaign_item = """SELECT item_code,
       final_price_amount,
       item_campaign_key
       --date_trunc(period_date, 'week')
  from public.item_catalog_history
 where sellable_flag = true
   and webshop_code = 'DE'
   and period_date >= '2021-04-17'
   and period_date <= '2021-04-28'
   and item_campaign_key = ''
   and item_code in (SELECT item_code
                       from public.item_catalog_history
                      where item_campaign_category like '[CAMPAIGN]HomieDay_Voucher_CW17|%'
                        and item_campaign_key like 'party14%'
                        and sellable_flag = true
                        and webshop_code = 'DE'
                        and period_date >= '2021-04-29'
                      GROUP BY item_code
                     HAVING MIN(period_date) = '2021-04-29'
                        and MAX(period_date) = '2021-05-10')
 GROUP BY item_code, final_price_amount, item_campaign_key--, date_trunc(period_date, 'week')
HAVING COUNT(*) = 12
 ORDER BY item_code ASC"""

In [6]:
no_campaign_item_voucher_value="""SELECT ich.item_code,
       ich.final_price_amount,
       vv.voucher_value,
       MIN(ich.period_date) as campaign_start,
       MAX(ich.period_date) as campaign_end
  from public.item_catalog_history as ich
       INNER JOIN (SELECT cp.product_code,
                          cp.campaign_date,
                          cd.voucher_value --, cd.voucher_value
                     FROM public.campaign_details as cd
                          INNER JOIN public.campaign_product as cp on cp.campaign_id = cd.id and cp.webshop_code = 'DE'
                    WHERE campaign_date >= '2021-04-17'
                      and campaign_date <= '2021-04-28'
                    GROUP BY product_code, voucher_value, campaign_date
  ) as vv
       on vv.product_code = ich.item_code and ich.period_date = vv.campaign_date
 where ich.sellable_flag = true
   and ich.webshop_code = 'DE'
   and ich.period_date >= '2021-04-17'
   and ich.period_date <= '2021-04-28'
   and vv.voucher_value = 0
   and ich.item_code in (SELECT item_code
                           from public.item_catalog_history
                          where item_campaign_category like '[CAMPAIGN]HomieDay_Voucher_CW17|%'
                            and item_campaign_key like 'party14%'
                            and sellable_flag = true
                            and webshop_code = 'DE'
                            and period_date >= '2021-04-29'
                          GROUP BY item_code
                         HAVING MIN(period_date) = '2021-04-29'
                            and MAX(period_date) = '2021-05-10')
 GROUP BY ich.item_code, ich.final_price_amount, vv.voucher_value--, date_trunc(period_date, 'week')
HAVING COUNT(*) = 12
 ORDER BY item_code ASC;"""


In [7]:
#df_homie14 = qtr.redshift_query_to_dataframe(qtr.cursor_redshift, homie_days_query)
#df_homie14 = qtr.redshift_query_to_dataframe(qtr.cursor_redshift, homie_bazooka_query)
#df_homie14_no_camp = qtr.redshift_query_to_dataframe(qtr.cursor_redshift, no_campaign_item)
df_homie14_no_camp = qtr.redshift_query_to_dataframe(qtr.cursor_redshift, no_campaign_item_voucher_value)

In [8]:
print(len(df_homie14_no_camp.index))

2740


In [9]:
print(df_homie14_no_camp.head())

            item_code       final_price_amount  voucher_value campaign_start  \
0  000000001000003443   449.990000000000000000            0.0     2021-04-17   
1  000000001000006421  1399.990000000000000000            0.0     2021-04-17   
2  000000001000006426  1399.990000000000000000            0.0     2021-04-17   
3  000000001000007050   429.990000000000000000            0.0     2021-04-17   
4  000000001000007052   449.990000000000000000            0.0     2021-04-17   

  campaign_end  
0   2021-04-28  
1   2021-04-28  
2   2021-04-28  
3   2021-04-28  
4   2021-04-28  


In [10]:
df_homie14_no_camp.dtypes

item_code              object
final_price_amount     object
voucher_value         float64
campaign_start         object
campaign_end           object
dtype: object

In [11]:
df_homie14_no_camp = df_homie14_no_camp.astype({'item_code': int})

In [12]:
df_homie14_no_camp.dtypes

item_code               int64
final_price_amount     object
voucher_value         float64
campaign_start         object
campaign_end           object
dtype: object

In [13]:
df_homie14_no_camp.head()


Unnamed: 0,item_code,final_price_amount,voucher_value,campaign_start,campaign_end
0,1000003443,449.99,0.0,2021-04-17,2021-04-28
1,1000006421,1399.99,0.0,2021-04-17,2021-04-28
2,1000006426,1399.99,0.0,2021-04-17,2021-04-28
3,1000007050,429.99,0.0,2021-04-17,2021-04-28
4,1000007052,449.99,0.0,2021-04-17,2021-04-28


In [14]:
merged_down = pd.merge(original_df_down, df_homie14_no_camp, on='item_code')


In [15]:
merged_up = pd.merge(original_df_up, df_homie14_no_camp, on='item_code')

In [16]:
merged_up.head()


Unnamed: 0,item_code,item_price_x,max_date,delivery_weeks,sales_pe,sales_pe_err,beta_del_week,beta_del_week_err,intercept,intercept_err,...,Delta_N_Sales,log_x_prime,N_prime_sales,Delta_Margin,Group_flag,Increment,final_price_amount,voucher_value,campaign_start,campaign_end
0,1000086540,349.99,2021-04-28,3,-1.4374,0.2356,-0.0499,0.0098,9.5983,1.4798,...,-0.01272,5.988933,0.840108,0.778171,up,14.000000000000002%,349.99,0.0,2021-04-17,2021-04-28
1,1000050876,569.99,2021-04-18,2,-1.1908,0.3369,-0.0119,0.0089,9.293,2.1853,...,-0.010538,6.476647,1.556809,5.774286,up,14.000000000000002%,569.99,0.0,2021-04-17,2021-04-28
2,1000184191,104.99,2021-04-27,4,-4.8137,0.8391,-0.0163,0.0412,26.1239,3.8742,...,-0.042599,4.784893,3.025659,0.183779,up,14.000000000000002%,104.99,0.0,2021-04-17,2021-04-28
3,1000027369,699.99,2021-04-15,2,-1.0499,0.3353,-0.0246,0.0122,7.6897,2.1169,...,-0.009291,6.682094,0.624969,0.938321,up,14.000000000000002%,699.99,0.0,2021-04-17,2021-04-28
4,1000008842,369.99,2021-04-24,1,-3.7269,0.5994,-0.0319,0.0291,25.8375,3.8917,...,-0.032981,6.044504,3.278337,4.5212,up,14.000000000000002%,369.99,0.0,2021-04-17,2021-04-28


In [17]:
merged_down.head()

Unnamed: 0,item_code,item_price_x,max_date,delivery_weeks,sales_pe,sales_pe_err,beta_del_week,beta_del_week_err,intercept,intercept_err,...,Delta_N_Sales,log_x_prime,N_prime_sales,Delta_Margin,Group_flag,Increment,final_price_amount,voucher_value,campaign_start,campaign_end
0,1000048003,349.99,2021-04-23,3,-2.7317,0.3851,-0.0089,0.0067,16.2329,2.1005,...,0.031399,5.707082,0.616165,0.5901,down,-14.000000000000002%,349.99,0.0,2021-04-17,2021-04-28
1,1000123695,199.99,2021-04-28,1,-2.5115,0.4648,-0.0307,0.0166,13.1606,2.3216,...,0.028868,5.147444,0.202093,1.547909,down,-14.000000000000002%,199.99,0.0,2021-04-17,2021-04-28
2,1000128456,429.99,2021-04-28,3,-0.7598,0.2379,-0.0255,0.0155,4.765,1.3544,...,0.008733,5.912939,0.195849,0.222921,down,-14.000000000000002%,429.99,0.0,2021-04-17,2021-04-28
3,1000091262,289.99,2020-10-14,3,-3.3256,0.431,-0.1002,0.0091,18.4672,2.3485,...,0.038225,5.519024,-0.187465,2.056987,down,-14.000000000000002%,319.99,0.0,2021-04-17,2021-04-28
4,1000145854,499.99,2021-04-19,2,-1.4837,0.209,0.0163,0.0108,8.5822,1.106,...,0.017054,6.063765,-0.382008,4.067149,down,-14.000000000000002%,499.99,0.0,2021-04-17,2021-04-28


In [18]:
list_of_items_up = merged_up['item_code'].to_numpy()

In [19]:
list_of_items_down = merged_down['item_code'].to_numpy()


In [20]:

print(list_of_items_up)

[1000086540 1000050876 1000184191 1000027369 1000008842 1000092019
 1000118147 1000103648 1000056978 1000008016 1000189778 1000103647
 1000189771 1000177767 1000050865 1000056783 1000008872 1000056964
 1000056953 1000110022 1000050842 1000056782 1000152213 1000008037
 1000119769 1000008035]


In [21]:
print(list_of_items_down)

[1000048003 1000123695 1000128456 1000091262 1000145854 1000123763
 1000110348 1000056742 1000071065 1000008825 1000200152 1000024953
 1000007069 1000024947 1000117189 1000007079 1000008056 1000210541
 1000050883 1000008867 1000008752 1000008808 1000118200 1000022120
 1000027408 1000007429 1000008058 1000007430 1000197490 1000007080
 1000153429 1000113296 1000008748 1000151250 1000008007 1000125642
 1000008755 1000034491 1000007107 1000050873 1000209624 1000027410
 1000008814 1000008031 1000147149 1000008815 1000007083 1000027357
 1000008067 1000134697 1000022129 1000008824 1000008809 1000050870
 1000134688 1000126687 1000008827 1000008039 1000027409 1000007325
 1000050877 1000104270 1000119258 1000116645 1000182823 1000008699]


In [22]:
merged_up['item_code'].to_csv('../raw_data/Excel_files/list_items_up_bazooka.csv')
merged_down['item_code'].to_csv('../raw_data/Excel_files/list_items_down_bazooka.csv')

In [23]:
total_bazooka_frames = [merged_up,merged_down]
total_bazooka = pd.concat(total_bazooka_frames)

In [24]:
total_bazooka.dtypes

item_code               int64
item_price_x          float64
max_date               object
delivery_weeks          int64
sales_pe              float64
sales_pe_err          float64
beta_del_week         float64
beta_del_week_err     float64
intercept             float64
intercept_err         float64
pc2                   float64
item_skey               int64
x                     float64
Delta_x               float64
Delta_cost            float64
Delta_N_Sales         float64
log_x_prime           float64
N_prime_sales         float64
Delta_Margin          float64
Group_flag             object
Increment              object
final_price_amount     object
voucher_value         float64
campaign_start         object
campaign_end           object
dtype: object

In [25]:
total_bazooka.to_csv('../raw_data/Excel_files/Bazooka.csv')