In [154]:
import datetime
import matplotlib.dates as mdates
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest

In [155]:
# get data from csv
infos = pd.read_csv('../data/infos.csv', sep='|')
items = pd.read_csv('../data/items.csv', sep='|')
orders = pd.read_csv('../data/orders.csv', sep='|')
orders['time'] = pd.to_datetime(orders.time)
orders['time'] = orders['time'].dt.strftime('%Y-%m-%d')

In [156]:
# aggregate 'order' and 'salesPrice' per day per product
agg_orders = orders.groupby(['itemID', 'time']).agg({'order': 'sum'})

In [157]:
def get_item(item_id):
    # get item by itemID
    item = agg_orders.loc[item_id]
    idx = pd.date_range('2018-01-01', '2018-06-29')
    item.index = pd.DatetimeIndex(item.index)
    item = item.reindex(idx, fill_value=0)
    item.index = pd.to_datetime(item.index)
    item = item.reset_index()
    item.index = item.index + 1
    item = item.rename(columns={'index': 'time'})
    return(item)

In [158]:
def add_feature_promotion(item):
    # apply standard scaling to data
    data = item[['order']]
    scaler = StandardScaler()
    np_scaled = scaler.fit_transform(data)
    data = pd.DataFrame(np_scaled)

    # train isolation forest
    model = IsolationForest(contamination=0.05)
    model.fit(data)
    item['promotion'] = model.predict(data)
    return item

In [159]:
def add_feature_weekday(item):
    item['weekday'] = item['time'].dt.dayofweek
    return item

In [160]:
# Hier kannst du dein neues Feature einfügen
def add_feature_custom(item):
    # TODO
    return item

In [161]:
def train_model(X_train, y_train):
    # TODO
    model = None
    return model

In [162]:
def get_prediction_data(item_id):
    promotions = infos[infos['itemID'] == item_id]['promotion']
    if promotions.isnull().values[0] == True:
        promotions = []
    else:
        promotions = promotions.values[0].split(',')
    day_range = pd.date_range('2018-06-30', periods=14, freq='D')
    X_pred = pd.DataFrame({'time': day_range})
    X_pred.loc[df['time'].isin(promotions),'promotion'] = -1
    X_pred.loc[~df['time'].isin(promotions),'promotion'] = 1
    return X_pred

In [163]:
def get_prediction(model, X_pred):
    # TODO
    prediction = None
    return prediction

In [None]:
# main loop
result_df = pd.DataFrame(columns=['itemID', 'demandPrediction'])
for item_id, group in agg_orders.groupby(['itemID']):
    # get item and add features
    item = get_item(item_id)
    item = add_feature_promotion(item)
    item = add_feature_weekday(item)
    #item = add_feature_custom(item)
    
    # create training dataset
    X_train = item[['promotion', 'weekday']]
    y_train = item['order'][:166]
    
    # train model with all data
    model = train_model(X_train, y_train)
    
    # get prediction data
    X_pred = get_prediction_data(item_id)
    
    # get prediction
    prediction = get_prediction(model, X_pred)
    
    # append prediction to result dataframe
    result_df = result_df.append({'itemID': item_id, 'demandPrediction': prediction}, ignore_index=True)

In [164]:
# convert result dataframe to output format
result_df.set_index('itemID', inplace=True)
index = pd.RangeIndex(1, 10464)
result_df = result_df.reindex(index).fillna(0)
result_df = result_df.reset_index()
result_df = result_df.rename(columns={'index': 'itemID'})

In [165]:
# safe dataframe to CSV
result_df.to_csv('Inst_Tech_Karlsruhe_2.csv', index=False, sep='|')