In [44]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [45]:
df_geo = pd.read_csv('Data - Hack UPC/geo_params.csv')
df_sales = pd.read_csv('Data - Hack UPC/sales.csv')
df_sku = pd.read_csv('Data - Hack UPC/sku.csv')
test = pd.read_csv('Data - Hack UPC/test.csv')

## Geo file

The geo_params variables are the following:
   - **geoCluster**: Number of a concrete zone in a city
   - **cityId**: City in which the geoCluster came from.

## Sku file

The sku variables are the following:
   - **SKU**: Unique code of SKU (Stock Keeping Unit), which identifies the smallest sales unit.
   - **Category**: The name of the set in which the Unit belongs.
   - **Type**: ?
   - **brandId**: Id of the branding in which this product belongs
   - **largerUnitQuantity**: ?
   - **Units**: Unkown, suppose number of units per SKU but itself express the smallest sales unit.
   - **trademark**: trademark number is the number that is assigned to a pending or registered trademark.
   - **countryOfOrigin**: Origin in which the product comes
   - **Group**: Set in which categories belong to

## Data change

In [46]:
df_sku.Category.fillna(df_sku.Group, inplace=True)
df_sku.brandId.fillna(0, inplace=True) # Meaning other
df_sku.trademark.fillna(0, inplace=True) # Meaning other
df_sku.countryOfOrigin.fillna(0, inplace=True) # Meaning other

df_sku["Category"] = df_sku["Category"].astype("category")
df_sku["Group"] = df_sku["Group"].astype("category")
df_sku["brandId"] = df_sku["brandId"].astype("Int64")
df_sku["trademark"] = df_sku["trademark"].astype("Int64")
df_sku["countryOfOrigin"] = df_sku["countryOfOrigin"].astype("Int64")
df_sku["lagerUnitQuantity"] = df_sku["lagerUnitQuantity"].astype("Float64")

df_sku.drop(['Units', 'Type'], axis=1, inplace=True)

df_sku.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   SKU                60 non-null     int64   
 1   Category           60 non-null     category
 2   brandId            60 non-null     Int64   
 3   lagerUnitQuantity  60 non-null     Float64 
 4   trademark          60 non-null     Int64   
 5   countryOfOrigin    60 non-null     Int64   
 6   Group              60 non-null     category
dtypes: Float64(1), Int64(3), category(2), int64(1)
memory usage: 4.4 KB


## Sales file

The sku variables are the following:
   - **ID**: id of the sale
   - **geoCluster**: Key of geo csv
   - **SKU**: Key of SKU csv
   - **date**: date in which the sale was done
   - **price**: price of the sale
   - **sales**: number of sales.

## Merge Dataframes

In order to look better the data, we will need to merge the 3 dataframes into a single one.

In [47]:
def merging_df(df_geo, df_sales, df_sku):
    df = pd.merge(df_geo, df_sales, on=["geoCluster"])
    return pd.merge(df_sku, df, on=["SKU"])

In [48]:
df = merging_df(df_geo, df_sales, df_sku)
df

Unnamed: 0,SKU,Category,brandId,lagerUnitQuantity,trademark,countryOfOrigin,Group,geoCluster,cityId,ID,date,price,sales
0,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958444,2021-03-03,79.49,0.4
1,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958445,2021-03-04,,
2,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958446,2021-03-05,,
3,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958447,2021-03-06,,
4,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958448,2021-03-07,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4605980,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,RR31319305,2021-07-01,39.79,7.0
4605981,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,RR31319306,2021-07-02,39.79,6.0
4605982,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,RR31319307,2021-07-03,39.79,21.0
4605983,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,RR31319308,2021-07-04,39.39,20.0


## Refactor

In [49]:
df.date = pd.to_datetime(df.date) #Change data type to datetime
df = df.dropna(subset=['sales']).reset_index()
df.drop(['index', 'ID'], axis=1, inplace=True)

In [50]:
df

Unnamed: 0,SKU,Category,brandId,lagerUnitQuantity,trademark,countryOfOrigin,Group,geoCluster,cityId,date,price,sales
0,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,2021-03-03,79.49,0.4
1,24,Pomegranate,0,1.0,0,0,Tropical fruits,1934,0,2020-11-05,46.89,1.4
2,24,Pomegranate,0,1.0,0,0,Tropical fruits,1934,0,2020-11-26,54.09,2.5
3,24,Pomegranate,0,1.0,0,0,Tropical fruits,1934,0,2020-11-28,54.09,1.4
4,24,Pomegranate,0,1.0,0,0,Tropical fruits,1934,0,2020-12-09,62.39,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...
778361,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,2021-07-01,39.79,7.0
778362,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,2021-07-02,39.79,6.0
778363,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,2021-07-03,39.79,21.0
778364,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,2021-07-04,39.39,20.0


In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 778366 entries, 0 to 778365
Data columns (total 12 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   SKU                778366 non-null  int64         
 1   Category           778366 non-null  category      
 2   brandId            778366 non-null  Int64         
 3   lagerUnitQuantity  778366 non-null  Float64       
 4   trademark          778366 non-null  Int64         
 5   countryOfOrigin    778366 non-null  Int64         
 6   Group              778366 non-null  category      
 7   geoCluster         778366 non-null  int64         
 8   cityId             778366 non-null  int64         
 9   date               778366 non-null  datetime64[ns]
 10  price              778366 non-null  float64       
 11  sales              778366 non-null  float64       
dtypes: Float64(1), Int64(3), category(2), datetime64[ns](1), float64(2), int64(3)
memory usage: 63.8 MB


## Train

In [52]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['Category'] = le.fit_transform(df['Category'])
mapping_categories = dict(zip(le.classes_, range(len(le.classes_))))

df['Group'] = le.fit_transform(df['Group'])
mapping_group = dict(zip(le.classes_, range(len(le.classes_))))

mapping_group

{'Bakery': 0,
 'Cheese': 1,
 'Mineral water': 2,
 'Tropical fruits': 3,
 'Yogurts': 4}

In [53]:
import datetime as dt

df.date = pd.to_datetime(df.date) 
df.date = df.date .map(dt.datetime.toordinal)
df['date'] = df['date'].astype(np.int64)

In [54]:
df

Unnamed: 0,SKU,Category,brandId,lagerUnitQuantity,trademark,countryOfOrigin,Group,geoCluster,cityId,date,price,sales
0,24,24,0,1.0,0,0,3,323,1,737852,79.49,0.4
1,24,24,0,1.0,0,0,3,1934,0,737734,46.89,1.4
2,24,24,0,1.0,0,0,3,1934,0,737755,54.09,2.5
3,24,24,0,1.0,0,0,3,1934,0,737757,54.09,1.4
4,24,24,0,1.0,0,0,3,1934,0,737768,62.39,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...
778361,838137,0,0,1.0,9666,0,3,3230,0,737972,39.79,7.0
778362,838137,0,0,1.0,9666,0,3,3230,0,737973,39.79,6.0
778363,838137,0,0,1.0,9666,0,3,3230,0,737974,39.79,21.0
778364,838137,0,0,1.0,9666,0,3,3230,0,737975,39.39,20.0


In [55]:
df['brandId'] = df['brandId'].astype("int64")
df['Group'] = df['Group'].astype("int64")
df['Category'] = df['Category'].astype("int64")
df['SKU'] = df['SKU'].astype("int64")
df['geoCluster'] = df['geoCluster'].astype("int64")
df["cityId"] = df['cityId'].astype("int64")
df["date"] = df["date"].astype("int64")
df["price"] = df["price"].astype("float64")
df['lagerUnitQuantity'] = df['lagerUnitQuantity'].astype('float64')
df['trademark'] = df['trademark'].astype('int64')
df['countryOfOrigin'] = df['countryOfOrigin'].astype('int64')

In [56]:
copy_df = df.copy()

In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 778366 entries, 0 to 778365
Data columns (total 12 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   SKU                778366 non-null  int64  
 1   Category           778366 non-null  int64  
 2   brandId            778366 non-null  int64  
 3   lagerUnitQuantity  778366 non-null  float64
 4   trademark          778366 non-null  int64  
 5   countryOfOrigin    778366 non-null  int64  
 6   Group              778366 non-null  int64  
 7   geoCluster         778366 non-null  int64  
 8   cityId             778366 non-null  int64  
 9   date               778366 non-null  int64  
 10  price              778366 non-null  float64
 11  sales              778366 non-null  float64
dtypes: float64(3), int64(9)
memory usage: 71.3 MB


In [58]:
testing_df = df.copy()

## SW Modelling per Category

In [102]:
import copy

def get_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(11,)),
        tf.keras.layers.Dense(100, activation='relu'),
        tf.keras.layers.Dense(100, activation='relu'),
        tf.keras.layers.Dense(100, activation='relu'),
        tf.keras.layers.Dense(100, activation='relu'),
        tf.keras.layers.Dense(1)
      ])

    model.compile(optimizer='adam',
                loss=tf.keras.losses.MeanSquaredError(),
                metrics=['RootMeanSquaredError'])
    return model

class ModelslNetworksCategory:
    def __init__(self, model_gen, num_categories):
        self.models = [model_gen() for x in range(num_categories)]
        self.num_categories = num_categories
        self.historials = {}
        
    def fit_group(self, X_train, y_train, X_test, y_test, num_category):
        self.historials[num_category] = self.models[num_category].fit(X_train, 
                                                                      y_train, 
                                                                      epochs=30, 
                                                                      validation_data=(X_test, Y_test))
        
    def predict(self, X, num_category):
        return self.models[num_category].predict(X)

In [103]:
get_model().summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_14 (Flatten)        (None, 11)                0         
                                                                 
 dense_69 (Dense)            (None, 100)               1200      
                                                                 
 dense_70 (Dense)            (None, 100)               10100     
                                                                 
 dense_71 (Dense)            (None, 100)               10100     
                                                                 
 dense_72 (Dense)            (None, 100)               10100     
                                                                 
 dense_73 (Dense)            (None, 1)                 101       
                                                                 
Total params: 31,601
Trainable params: 31,601
Non-tra

In [61]:
macroModel = ModelslNetworksCategory(get_model, len(testing_df.Group.unique()))

In [62]:
testing_df = df.copy()

In [63]:
mapping_group

{'Bakery': 0,
 'Cheese': 1,
 'Mineral water': 2,
 'Tropical fruits': 3,
 'Yogurts': 4}

In [69]:
testing_df

Unnamed: 0,SKU,Category,brandId,lagerUnitQuantity,trademark,countryOfOrigin,Group,geoCluster,cityId,date,price,sales
0,24,24,0,1.0,0,0,3,323,1,737852,79.49,0.4
1,24,24,0,1.0,0,0,3,1934,0,737734,46.89,1.4
2,24,24,0,1.0,0,0,3,1934,0,737755,54.09,2.5
3,24,24,0,1.0,0,0,3,1934,0,737757,54.09,1.4
4,24,24,0,1.0,0,0,3,1934,0,737768,62.39,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...
778361,838137,0,0,1.0,9666,0,3,3230,0,737972,39.79,7.0
778362,838137,0,0,1.0,9666,0,3,3230,0,737973,39.79,6.0
778363,838137,0,0,1.0,9666,0,3,3230,0,737974,39.79,21.0
778364,838137,0,0,1.0,9666,0,3,3230,0,737975,39.39,20.0


In [76]:
from sklearn.model_selection import train_test_split

def get_data_model(a, test_size):
    target = a.pop('sales')
    X_train, X_test, Y_train, Y_test = train_test_split(group_data, target, test_size=0.2)
    return X_train, X_test, Y_train, Y_test

In [77]:
for group in testing_df.Group.unique():
    group_data = testing_df[testing_df.Group == group].copy()
    X_train, X_test, Y_train, Y_test = get_data_model(group_data, 0.2)
    
    numeric_features_train = tf.convert_to_tensor(X_train)
    numeric_features_test = tf.convert_to_tensor(X_test)

    normalizer = tf.keras.layers.Normalization(axis=-1)
    normalizer.adapt(numeric_features_train)
    normalizer.adapt(numeric_features_test)
    
    print('-------------------------')
    print('Training number: ',group)
    macroModel.fit_group(numeric_features_train, Y_train, numeric_features_test, Y_test, group)
    print('-------------------------')

-------------------------
Training number:  3
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
-------------------------
-------------------------
Training number:  2
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30


Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
-------------------------
-------------------------
Training number:  1
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
-------------------------
-------------------------
Training number:  4
Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
-------------------------
-------------------------
Training number:  0
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30


Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
-------------------------


In [78]:
num_model = 0
for model in macroModel.models:
    model.save('model{num}.h5'.format(num=num_model))
    num_model += 1

## Test

In [79]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 202737 entries, 0 to 202736
Data columns (total 6 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   ID            202737 non-null  object 
 1   geoCluster    202737 non-null  int64  
 2   SKU           202737 non-null  int64  
 3   date          202737 non-null  object 
 4   price_filled  202737 non-null  float64
 5   sales         0 non-null       float64
dtypes: float64(2), int64(2), object(2)
memory usage: 9.3+ MB


In [80]:
testing = merging_df(df_geo, test, df_sku)
testing

Unnamed: 0,SKU,Category,brandId,lagerUnitQuantity,trademark,countryOfOrigin,Group,geoCluster,cityId,ID,date,price_filled,sales
0,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958569,2021-07-06,79.49,
1,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958570,2021-07-07,79.49,
2,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958571,2021-07-08,79.49,
3,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958572,2021-07-09,79.49,
4,24,Pomegranate,0,1.0,0,0,Tropical fruits,323,1,RR27958573,2021-07-10,79.49,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
202732,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,RR31319319,2021-07-15,51.99,
202733,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,RR31319320,2021-07-16,52.59,
202734,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,RR31319321,2021-07-17,52.29,
202735,838137,Avocado,0,1.0,9666,0,Tropical fruits,3230,0,RR31319322,2021-07-18,52.59,


In [81]:
testing.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 202737 entries, 0 to 202736
Data columns (total 13 columns):
 #   Column             Non-Null Count   Dtype   
---  ------             --------------   -----   
 0   SKU                202737 non-null  int64   
 1   Category           202737 non-null  category
 2   brandId            202737 non-null  Int64   
 3   lagerUnitQuantity  202737 non-null  Float64 
 4   trademark          202737 non-null  Int64   
 5   countryOfOrigin    202737 non-null  Int64   
 6   Group              202737 non-null  category
 7   geoCluster         202737 non-null  int64   
 8   cityId             202737 non-null  int64   
 9   ID                 202737 non-null  object  
 10  date               202737 non-null  object  
 11  price_filled       202737 non-null  float64 
 12  sales              0 non-null       float64 
dtypes: Float64(1), Int64(3), category(2), float64(2), int64(3), object(2)
memory usage: 19.7+ MB


In [82]:
testing.drop(['ID', 'sales'], axis=1, inplace=True)

In [83]:
testing.date = pd.to_datetime(testing.date) 
testing.date = testing.date .map(dt.datetime.toordinal)
testing['date'] = testing['date'].astype(np.int64)

In [84]:
testing['Category'] = testing['Category'].map(mapping_categories) 
testing['Group'] = testing['Group'].map(mapping_group) 

In [85]:
testing['brandId'] = testing['brandId'].astype("int64")
testing['Group'] = testing['Group'].astype("int64")
testing['Category'] = testing['Category'].astype("int64")
testing['SKU'] = testing['SKU'].astype("int64")
testing['geoCluster'] = testing['geoCluster'].astype("int64")
testing["cityId"] = testing['cityId'].astype("int64")
testing["date"] = testing["date"].astype("int64")
testing["price_filled"] = testing["price_filled"].astype("float64")
testing['lagerUnitQuantity'] = testing['lagerUnitQuantity'].astype('float64')
testing['trademark'] = testing['trademark'].astype('int64')
testing['countryOfOrigin'] = testing['countryOfOrigin'].astype('int64')

In [86]:
'''numeric_features_test = tf.convert_to_tensor(testing)

normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(numeric_features_test)'''

'numeric_features_test = tf.convert_to_tensor(testing)\n\nnormalizer = tf.keras.layers.Normalization(axis=-1)\nnormalizer.adapt(numeric_features_test)'

In [87]:
testing

Unnamed: 0,SKU,Category,brandId,lagerUnitQuantity,trademark,countryOfOrigin,Group,geoCluster,cityId,date,price_filled
0,24,24,0,1.0,0,0,3,323,1,737977,79.49
1,24,24,0,1.0,0,0,3,323,1,737978,79.49
2,24,24,0,1.0,0,0,3,323,1,737979,79.49
3,24,24,0,1.0,0,0,3,323,1,737980,79.49
4,24,24,0,1.0,0,0,3,323,1,737981,79.49
...,...,...,...,...,...,...,...,...,...,...,...
202732,838137,0,0,1.0,9666,0,3,3230,0,737986,51.99
202733,838137,0,0,1.0,9666,0,3,3230,0,737987,52.59
202734,838137,0,0,1.0,9666,0,3,3230,0,737988,52.29
202735,838137,0,0,1.0,9666,0,3,3230,0,737989,52.59


In [88]:
df_split = []
idxs = []
for i in range(len(testing.Group.unique())):
    spl = testing[testing.Group == i]
    df_split.append(spl)
    idxs.append(spl.index.values)

In [89]:
def vectorization(a):
    numeric_features_test = tf.convert_to_tensor(a)
    
    normalizer = tf.keras.layers.Normalization(axis=-1)
    normalizer.adapt(numeric_features_test)
    return numeric_features_test

In [90]:
predictions_group = []

num_model = 0
for group_data in range(5):
    group_data = df_split[num_model]
    print(df_split[num_model].shape)
    print(len(idxs[num_model]))
    X = vectorization(group_data)
    pred = macroModel.predict(X, num_model)
    print(len(pred))
    predictions_group.append(pred)
    num_model += 1

(53023, 11)
53023
53023
(24824, 11)
24824
24824
(38202, 11)
38202
38202
(52107, 11)
52107
52107
(34581, 11)
34581
34581


In [91]:
for array in predictions_group:
    print(array.shape)

(53023, 1)
(24824, 1)
(38202, 1)
(52107, 1)
(34581, 1)


In [92]:
idxs = np.concatenate(idxs)
tt = np.concatenate(predictions_group)

In [93]:
idxs.shape

(202737,)

In [94]:
tt = tt.reshape(idxs.shape)

In [95]:
tt.shape

(202737,)

In [96]:
predictions = pd.DataFrame({'idxs':idxs, 'sales':tt})

In [97]:
predictions.sort_values(by=['idxs'], inplace=True)
predictions.set_index('idxs', inplace=True)
predictions

Unnamed: 0_level_0,sales
idxs,Unnamed: 1_level_1
0,3.610947
1,3.610947
2,3.610947
3,3.610947
4,3.610947
...,...
202732,3.610947
202733,3.610947
202734,3.610947
202735,3.610947


In [98]:
test['sales'] = predictions['sales']
test

Unnamed: 0,ID,geoCluster,SKU,date,price_filled,sales
0,RR27956447,21,32485,2021-07-08,39.69,3.610947
1,RR27956448,21,32485,2021-07-09,39.69,3.610947
2,RR27956449,21,32485,2021-07-10,39.69,3.610947
3,RR27956450,21,32485,2021-07-11,39.69,3.610947
4,RR27956451,21,32485,2021-07-12,39.69,3.610947
...,...,...,...,...,...,...
202732,RR55912539,3230,711838,2021-07-15,28.49,3.610947
202733,RR55912540,3230,711838,2021-07-16,28.49,3.610947
202734,RR55912541,3230,711838,2021-07-17,28.49,3.610947
202735,RR55912542,3230,711838,2021-07-18,28.49,3.610947


In [99]:
test.to_csv('predictions.csv')