# Cross of the store data

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
mode = 'test'

In [3]:
store_data = pd.read_csv('/home/elias/Projects/rossman_store_sales/data/raw/store.csv')
store_data

Unnamed: 0,Store,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval
0,1,c,a,1270.0,9.0,2008.0,0,,,
1,2,a,a,570.0,11.0,2007.0,1,13.0,2010.0,"Jan,Apr,Jul,Oct"
2,3,a,a,14130.0,12.0,2006.0,1,14.0,2011.0,"Jan,Apr,Jul,Oct"
3,4,c,c,620.0,9.0,2009.0,0,,,
4,5,a,a,29910.0,4.0,2015.0,0,,,
...,...,...,...,...,...,...,...,...,...,...
1110,1111,a,a,1900.0,6.0,2014.0,1,31.0,2013.0,"Jan,Apr,Jul,Oct"
1111,1112,c,c,1880.0,4.0,2006.0,0,,,
1112,1113,a,c,9260.0,,,0,,,
1113,1114,a,c,870.0,,,0,,,


In [4]:
store_data.columns

Index(['Store', 'StoreType', 'Assortment', 'CompetitionDistance',
       'CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear', 'Promo2',
       'Promo2SinceWeek', 'Promo2SinceYear', 'PromoInterval'],
      dtype='object')

In [5]:
store_data.isna().sum()

Store                          0
StoreType                      0
Assortment                     0
CompetitionDistance            3
CompetitionOpenSinceMonth    354
CompetitionOpenSinceYear     354
Promo2                         0
Promo2SinceWeek              544
Promo2SinceYear              544
PromoInterval                544
dtype: int64

In [6]:
store_data.loc[store_data.Promo2 == 1].isna().sum()

Store                          0
StoreType                      0
Assortment                     0
CompetitionDistance            1
CompetitionOpenSinceMonth    206
CompetitionOpenSinceYear     206
Promo2                         0
Promo2SinceWeek                0
Promo2SinceYear                0
PromoInterval                  0
dtype: int64

Os valores nulos em promo2 se devem a promocao nao existir

In [7]:
store_data.loc[store_data.Promo2 == 1].PromoInterval.str.split(',').apply(lambda x: len(x)).unique()

array([4])

Então todas as lojas com promocao vao ter elas em 4 meses do ano. Uma forma facil é criar uma variavel para cada mes e deixa-la booleana

In [8]:
# ohe dos dados
columns_to_encode = ['StoreType', 'Assortment']

# Aplique o One-Hot Encoding
store_data_encoded = pd.get_dummies(store_data, columns=columns_to_encode)

# Exiba o novo DataFrame após o One-Hot Encoding
store_data_encoded.head()

Unnamed: 0,Store,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,StoreType_a,StoreType_b,StoreType_c,StoreType_d,Assortment_a,Assortment_b,Assortment_c
0,1,1270.0,9.0,2008.0,0,,,,0,0,1,0,1,0,0
1,2,570.0,11.0,2007.0,1,13.0,2010.0,"Jan,Apr,Jul,Oct",1,0,0,0,1,0,0
2,3,14130.0,12.0,2006.0,1,14.0,2011.0,"Jan,Apr,Jul,Oct",1,0,0,0,1,0,0
3,4,620.0,9.0,2009.0,0,,,,0,0,1,0,0,0,1
4,5,29910.0,4.0,2015.0,0,,,,1,0,0,0,1,0,0


In [9]:
store_data.loc[store_data.Promo2 == 1].PromoInterval.str.split(',')

1        [Jan, Apr, Jul, Oct]
2        [Jan, Apr, Jul, Oct]
10       [Jan, Apr, Jul, Oct]
11       [Jan, Apr, Jul, Oct]
12       [Feb, May, Aug, Nov]
                ...          
1105     [Jan, Apr, Jul, Oct]
1106     [Jan, Apr, Jul, Oct]
1108     [Jan, Apr, Jul, Oct]
1110     [Jan, Apr, Jul, Oct]
1114    [Mar, Jun, Sept, Dec]
Name: PromoInterval, Length: 571, dtype: object

In [10]:
promo_interval_series = store_data.loc[store_data.Promo2 == 1].PromoInterval.str.split(',')
temp_df = pd.DataFrame(promo_interval_series.tolist(), index=promo_interval_series.index)

# Converta as strings de meses em colunas binárias
temp_df = pd.get_dummies(temp_df, prefix='Promo2_', prefix_sep='').groupby(level=0, axis=1).max()

# Adicione as novas colunas ao DataFrame original
temp_df

Unnamed: 0,Promo2_Apr,Promo2_Aug,Promo2_Dec,Promo2_Feb,Promo2_Jan,Promo2_Jul,Promo2_Jun,Promo2_Mar,Promo2_May,Promo2_Nov,Promo2_Oct,Promo2_Sept
1,1,0,0,0,1,1,0,0,0,0,1,0
2,1,0,0,0,1,1,0,0,0,0,1,0
10,1,0,0,0,1,1,0,0,0,0,1,0
11,1,0,0,0,1,1,0,0,0,0,1,0
12,0,1,0,1,0,0,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1105,1,0,0,0,1,1,0,0,0,0,1,0
1106,1,0,0,0,1,1,0,0,0,0,1,0
1108,1,0,0,0,1,1,0,0,0,0,1,0
1110,1,0,0,0,1,1,0,0,0,0,1,0


In [11]:
store_data_months = pd.merge(store_data, temp_df, how='left', left_index=True, right_index=True)
month_cols = ['Promo2_' + x for x in ['Apr', 'Aug',
       'Dec', 'Feb', 'Jan', 'Jul', 'Jun', 'Mar', 'May', 'Nov', 'Oct', 'Sept']]
store_data_months[month_cols] = store_data_months[month_cols].fillna(0).astype('uint8')
store_data_months

Unnamed: 0,Store,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,...,Promo2_Dec,Promo2_Feb,Promo2_Jan,Promo2_Jul,Promo2_Jun,Promo2_Mar,Promo2_May,Promo2_Nov,Promo2_Oct,Promo2_Sept
0,1,c,a,1270.0,9.0,2008.0,0,,,,...,0,0,0,0,0,0,0,0,0,0
1,2,a,a,570.0,11.0,2007.0,1,13.0,2010.0,"Jan,Apr,Jul,Oct",...,0,0,1,1,0,0,0,0,1,0
2,3,a,a,14130.0,12.0,2006.0,1,14.0,2011.0,"Jan,Apr,Jul,Oct",...,0,0,1,1,0,0,0,0,1,0
3,4,c,c,620.0,9.0,2009.0,0,,,,...,0,0,0,0,0,0,0,0,0,0
4,5,a,a,29910.0,4.0,2015.0,0,,,,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1110,1111,a,a,1900.0,6.0,2014.0,1,31.0,2013.0,"Jan,Apr,Jul,Oct",...,0,0,1,1,0,0,0,0,1,0
1111,1112,c,c,1880.0,4.0,2006.0,0,,,,...,0,0,0,0,0,0,0,0,0,0
1112,1113,a,c,9260.0,,,0,,,,...,0,0,0,0,0,0,0,0,0,0
1113,1114,a,c,870.0,,,0,,,,...,0,0,0,0,0,0,0,0,0,0


In [12]:
store_data_months[['Promo2SinceWeek', 'Promo2SinceYear']]

Unnamed: 0,Promo2SinceWeek,Promo2SinceYear
0,,
1,13.0,2010.0
2,14.0,2011.0
3,,
4,,
...,...,...
1110,31.0,2013.0
1111,,
1112,,
1113,,


In [13]:
store_data_months[['Promo2SinceWeek', 'Promo2SinceYear']] = store_data_months[['Promo2SinceWeek', 'Promo2SinceYear']].fillna(0)

# Converta as colunas para o tipo inteiro
store_data_months[['Promo2SinceWeek', 'Promo2SinceYear']] = store_data_months[['Promo2SinceWeek', 'Promo2SinceYear']].astype(int)

# Combine as colunas para criar uma coluna de datas
store_data_months['Promo2StartDate'] = pd.to_datetime(store_data_months['Promo2SinceYear'].astype(str) + store_data_months['Promo2SinceWeek'].astype(str) + '1', format='%Y%W%w', errors='coerce')
store_data_months

Unnamed: 0,Store,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,...,Promo2_Feb,Promo2_Jan,Promo2_Jul,Promo2_Jun,Promo2_Mar,Promo2_May,Promo2_Nov,Promo2_Oct,Promo2_Sept,Promo2StartDate
0,1,c,a,1270.0,9.0,2008.0,0,0,0,,...,0,0,0,0,0,0,0,0,0,NaT
1,2,a,a,570.0,11.0,2007.0,1,13,2010,"Jan,Apr,Jul,Oct",...,0,1,1,0,0,0,0,1,0,2010-03-29
2,3,a,a,14130.0,12.0,2006.0,1,14,2011,"Jan,Apr,Jul,Oct",...,0,1,1,0,0,0,0,1,0,2011-04-04
3,4,c,c,620.0,9.0,2009.0,0,0,0,,...,0,0,0,0,0,0,0,0,0,NaT
4,5,a,a,29910.0,4.0,2015.0,0,0,0,,...,0,0,0,0,0,0,0,0,0,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1110,1111,a,a,1900.0,6.0,2014.0,1,31,2013,"Jan,Apr,Jul,Oct",...,0,1,1,0,0,0,0,1,0,2013-08-05
1111,1112,c,c,1880.0,4.0,2006.0,0,0,0,,...,0,0,0,0,0,0,0,0,0,NaT
1112,1113,a,c,9260.0,,,0,0,0,,...,0,0,0,0,0,0,0,0,0,NaT
1113,1114,a,c,870.0,,,0,0,0,,...,0,0,0,0,0,0,0,0,0,NaT


In [14]:
# Preencha NaNs nas colunas com zeros para evitar problemas na conversão
store_data_months[['CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear']] = store_data_months[['CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear']].fillna(0)

# Converta as colunas para o tipo inteiro
store_data_months[['CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear']] = store_data_months[['CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear']].astype(int)

# Combine as colunas para criar uma coluna de datas
store_data_months['CompetitionOpenStartDate'] = pd.to_datetime(store_data_months['CompetitionOpenSinceYear'].astype(str) + store_data_months['CompetitionOpenSinceMonth'].astype(str) + '1', format='%Y%m%d', errors='coerce')

store_data_months


Unnamed: 0,Store,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,...,Promo2_Jan,Promo2_Jul,Promo2_Jun,Promo2_Mar,Promo2_May,Promo2_Nov,Promo2_Oct,Promo2_Sept,Promo2StartDate,CompetitionOpenStartDate
0,1,c,a,1270.0,9,2008,0,0,0,,...,0,0,0,0,0,0,0,0,NaT,2008-09-01
1,2,a,a,570.0,11,2007,1,13,2010,"Jan,Apr,Jul,Oct",...,1,1,0,0,0,0,1,0,2010-03-29,2007-11-01
2,3,a,a,14130.0,12,2006,1,14,2011,"Jan,Apr,Jul,Oct",...,1,1,0,0,0,0,1,0,2011-04-04,2006-12-01
3,4,c,c,620.0,9,2009,0,0,0,,...,0,0,0,0,0,0,0,0,NaT,2009-09-01
4,5,a,a,29910.0,4,2015,0,0,0,,...,0,0,0,0,0,0,0,0,NaT,2015-04-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1110,1111,a,a,1900.0,6,2014,1,31,2013,"Jan,Apr,Jul,Oct",...,1,1,0,0,0,0,1,0,2013-08-05,2014-06-01
1111,1112,c,c,1880.0,4,2006,0,0,0,,...,0,0,0,0,0,0,0,0,NaT,2006-04-01
1112,1113,a,c,9260.0,0,0,0,0,0,,...,0,0,0,0,0,0,0,0,NaT,NaT
1113,1114,a,c,870.0,0,0,0,0,0,,...,0,0,0,0,0,0,0,0,NaT,NaT


In [15]:
columns_to_encode = ['StoreType', 'Assortment']
store_data_encoded = pd.get_dummies(store_data_months, columns=columns_to_encode)
store_data_encoded

Unnamed: 0,Store,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,Promo2_Apr,Promo2_Aug,...,Promo2_Sept,Promo2StartDate,CompetitionOpenStartDate,StoreType_a,StoreType_b,StoreType_c,StoreType_d,Assortment_a,Assortment_b,Assortment_c
0,1,1270.0,9,2008,0,0,0,,0,0,...,0,NaT,2008-09-01,0,0,1,0,1,0,0
1,2,570.0,11,2007,1,13,2010,"Jan,Apr,Jul,Oct",1,0,...,0,2010-03-29,2007-11-01,1,0,0,0,1,0,0
2,3,14130.0,12,2006,1,14,2011,"Jan,Apr,Jul,Oct",1,0,...,0,2011-04-04,2006-12-01,1,0,0,0,1,0,0
3,4,620.0,9,2009,0,0,0,,0,0,...,0,NaT,2009-09-01,0,0,1,0,0,0,1
4,5,29910.0,4,2015,0,0,0,,0,0,...,0,NaT,2015-04-01,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1110,1111,1900.0,6,2014,1,31,2013,"Jan,Apr,Jul,Oct",1,0,...,0,2013-08-05,2014-06-01,1,0,0,0,1,0,0
1111,1112,1880.0,4,2006,0,0,0,,0,0,...,0,NaT,2006-04-01,0,0,1,0,0,0,1
1112,1113,9260.0,0,0,0,0,0,,0,0,...,0,NaT,NaT,1,0,0,0,0,0,1
1113,1114,870.0,0,0,0,0,0,,0,0,...,0,NaT,NaT,1,0,0,0,0,0,1


In [16]:
store_data_encoded.columns

Index(['Store', 'CompetitionDistance', 'CompetitionOpenSinceMonth',
       'CompetitionOpenSinceYear', 'Promo2', 'Promo2SinceWeek',
       'Promo2SinceYear', 'PromoInterval', 'Promo2_Apr', 'Promo2_Aug',
       'Promo2_Dec', 'Promo2_Feb', 'Promo2_Jan', 'Promo2_Jul', 'Promo2_Jun',
       'Promo2_Mar', 'Promo2_May', 'Promo2_Nov', 'Promo2_Oct', 'Promo2_Sept',
       'Promo2StartDate', 'CompetitionOpenStartDate', 'StoreType_a',
       'StoreType_b', 'StoreType_c', 'StoreType_d', 'Assortment_a',
       'Assortment_b', 'Assortment_c'],
      dtype='object')

In [17]:
cols_cross = ['Store', 'CompetitionDistance', 'Promo2_Apr', 'Promo2_Aug',
       'Promo2_Dec', 'Promo2_Feb', 'Promo2_Jan', 'Promo2_Jul', 'Promo2_Jun',
       'Promo2_Mar', 'Promo2_May', 'Promo2_Nov', 'Promo2_Oct', 'Promo2_Sept', 'StoreType_a',
       'StoreType_b', 'StoreType_c', 'StoreType_d', 'Assortment_a',
       'Assortment_b', 'Assortment_c']

store_data_cross = store_data_encoded[cols_cross]
store_data_cross

Unnamed: 0,Store,CompetitionDistance,Promo2_Apr,Promo2_Aug,Promo2_Dec,Promo2_Feb,Promo2_Jan,Promo2_Jul,Promo2_Jun,Promo2_Mar,...,Promo2_Nov,Promo2_Oct,Promo2_Sept,StoreType_a,StoreType_b,StoreType_c,StoreType_d,Assortment_a,Assortment_b,Assortment_c
0,1,1270.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
1,2,570.0,1,0,0,0,1,1,0,0,...,0,1,0,1,0,0,0,1,0,0
2,3,14130.0,1,0,0,0,1,1,0,0,...,0,1,0,1,0,0,0,1,0,0
3,4,620.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
4,5,29910.0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1110,1111,1900.0,1,0,0,0,1,1,0,0,...,0,1,0,1,0,0,0,1,0,0
1111,1112,1880.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
1112,1113,9260.0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
1113,1114,870.0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1


## Cruzamento dos dados    

In [18]:
train_data = pd.read_csv(f'/home/elias/Projects/rossman_store_sales/data/interim/{mode}.csv', index_col=0)
train_data

Unnamed: 0,Id,Store,Date,Open,Promo,SchoolHoliday,StateHoliday_a,StateHoliday_b,StateHoliday_c,day_1,day_2,day_3,day_4,day_5,day_6,day_7
0,1,1,2015-09-17,1,1,0,0,0,0,0,0,0,1,0,0,0
1,2,3,2015-09-17,1,1,0,0,0,0,0,0,0,1,0,0,0
2,3,7,2015-09-17,1,1,0,0,0,0,0,0,0,1,0,0,0
3,4,8,2015-09-17,1,1,0,0,0,0,0,0,0,1,0,0,0
4,5,9,2015-09-17,1,1,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41083,41084,1111,2015-08-01,1,0,0,0,0,0,0,0,0,0,0,1,0
41084,41085,1112,2015-08-01,1,0,0,0,0,0,0,0,0,0,0,1,0
41085,41086,1113,2015-08-01,1,0,0,0,0,0,0,0,0,0,0,1,0
41086,41087,1114,2015-08-01,1,0,0,0,0,0,0,0,0,0,0,1,0


In [19]:
train_data_cross = pd.merge(train_data, store_data_cross, how='left', left_on='Store', right_on='Store').rename(columns={'Store_x': 'Store'})
train_data_cross

Unnamed: 0,Id,Store,Date,Open,Promo,SchoolHoliday,StateHoliday_a,StateHoliday_b,StateHoliday_c,day_1,...,Promo2_Nov,Promo2_Oct,Promo2_Sept,StoreType_a,StoreType_b,StoreType_c,StoreType_d,Assortment_a,Assortment_b,Assortment_c
0,1,1,2015-09-17,1,1,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
1,2,3,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,1,0,0,0,1,0,0
2,3,7,2015-09-17,1,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
3,4,8,2015-09-17,1,1,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
4,5,9,2015-09-17,1,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41083,41084,1111,2015-08-01,1,0,0,0,0,0,0,...,0,1,0,1,0,0,0,1,0,0
41084,41085,1112,2015-08-01,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
41085,41086,1113,2015-08-01,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
41086,41087,1114,2015-08-01,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1


In [20]:
train_data_cross.loc[train_data_cross.StoreType_a.isna(), 'Store'].unique()

array([], dtype=int64)

## Datas do competidor e promo2

In [21]:
train_data_cross[['CompetitionOpen', 'Promo2Started']] = 0
train_data_cross

Unnamed: 0,Id,Store,Date,Open,Promo,SchoolHoliday,StateHoliday_a,StateHoliday_b,StateHoliday_c,day_1,...,Promo2_Sept,StoreType_a,StoreType_b,StoreType_c,StoreType_d,Assortment_a,Assortment_b,Assortment_c,CompetitionOpen,Promo2Started
0,1,1,2015-09-17,1,1,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0
1,2,3,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
2,3,7,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
3,4,8,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
4,5,9,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41083,41084,1111,2015-08-01,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0
41084,41085,1112,2015-08-01,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
41085,41086,1113,2015-08-01,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
41086,41087,1114,2015-08-01,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0


In [22]:
train_data_cross.Date = pd.to_datetime(train_data_cross.Date)

In [23]:
for store in store_data_encoded.Store.unique():
    row = store_data_encoded.loc[store_data_encoded['Store'] == store].iloc[0]
    data_competition = row.CompetitionOpenStartDate
    data_promo2 = row.Promo2StartDate

    if not pd.Series(data_competition).isna()[0]:
        train_data_cross.loc[train_data_cross.Store == store, 'CompetitionOpen'] = train_data_cross.loc[train_data_cross.Store == store, 'Date'] >= data_competition
    if not pd.Series(data_promo2).isna()[0]:
        train_data_cross.loc[train_data_cross.Store == store, 'Promo2Started'] = train_data_cross.loc[train_data_cross.Store == store, 'Date'] >= data_promo2

train_data_cross[['CompetitionOpen', 'Promo2Started']] = train_data_cross[['CompetitionOpen', 'Promo2Started']].astype('uint8')

In [24]:
train_data_cross

Unnamed: 0,Id,Store,Date,Open,Promo,SchoolHoliday,StateHoliday_a,StateHoliday_b,StateHoliday_c,day_1,...,Promo2_Sept,StoreType_a,StoreType_b,StoreType_c,StoreType_d,Assortment_a,Assortment_b,Assortment_c,CompetitionOpen,Promo2Started
0,1,1,2015-09-17,1,1,0,0,0,0,0,...,0,0,0,1,0,1,0,0,1,0
1,2,3,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1
2,3,7,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,0,0,0,0,1,1,0
3,4,8,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,0,0,1,0,0,1,0
4,5,9,2015-09-17,1,1,0,0,0,0,0,...,0,1,0,0,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41083,41084,1111,2015-08-01,1,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1
41084,41085,1112,2015-08-01,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,1,0
41085,41086,1113,2015-08-01,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
41086,41087,1114,2015-08-01,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0


In [25]:
train_data_cross.to_csv(f'/home/elias/Projects/rossman_store_sales/data/processed/{mode}_store.csv')