In [17]:
# OS operations to get current working directory.
import os 
os.chdir(r"/home/inavasyon/Desktop/BurgerProject")
path = os.getcwd()
path

'/home/inavasyon/Desktop/BurgerProject'

In [18]:
# Importing necessary libraries.
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns 
import datetime as dt
import os 
import glob
from matplotlib import style
style.use("ggplot")

In [19]:
def data_import():
    """ This function imports data from current working directory and returns a dictionary which 
    have restaurant names as keys and restaurant sales data in dataframe format."""
    files = glob.glob("*.csv")
    print(files)
    names = [os.path.basename(x) for x in glob.glob("*.csv")]
    print(names)
    dataframe_dict = {}
    for i in range(len(files)):
        dataframe = pd.read_csv(files[i])
        dataframe_dict.update({names[i]:dataframe})
    return dataframe_dict

In [20]:
def EDA(dataframe):
    """ This is a simple Exploratory Data Analysis (EDA) function which gives information about 
    dataframe shape, column datatypes, dataframe statistics and number of missing values."""
    print(dataframe.shape)
    print(dataframe.info())
    print(dataframe.describe())
    print(dataframe.isnull().sum())    
    # print(dataframe.head())

In [21]:
def processing_data(dataframe):
    """ This function do many operations in order to make data ready for further investigation. 
    It changes some column datatypes, it manupilates some columns to create new ones, it changes some column names, 
    drop some columns, creates index and sort that index etc. It returns an analysis-ready dataframe. """
    dataframe2 = dataframe.copy()
    dataframe2["Date"] = pd.to_datetime(dataframe["DateOfBusiness"])
    dataframe2 = dataframe2.rename(columns = {"DiscPric" : "Total", "LongName":"BurgerMenu"})
    dataframe2["Total"] = dataframe2["Total"].astype(str)
    dataframe2["Price"] = dataframe2["Price"].astype(str)
    list1 = []
    for i in dataframe2["Total"]:
        list1.append(i.replace(",","."))
    dataframe2["Total_"] = pd.Series(list1)
    list2 = [] 
    for i in dataframe2["Price"]:
        list2.append(i.replace(",","."))
    dataframe2["Unit_Price"] = pd.Series(list2)
    dataframe2.drop(["Total", "Price"], axis = 1 , inplace = True)
    dataframe2["Total"] = dataframe2["Total_"].astype("float")
    dataframe2["UnitPrice"] = dataframe2["Unit_Price"].astype("float")
    dataframe2.drop(["Total_", "Unit_Price"], axis = 1, inplace = True)
    dataframe2["hour"] = dataframe2["Hour"].astype(str)
    dataframe2["minute"] = dataframe2["Minute"].astype(str)
    dataframe2.drop(["Hour", "Minute"], axis = 1, inplace = True)
    dataframe2["Hour_Minute_"] = dataframe2["hour"].str.cat(dataframe2["minute"], sep = "-")
    dataframe2["Date2"] = dataframe2["Date"].astype(str)
    dataframe2["Date_Hour_Minute"] = dataframe2["Date2"].str.cat(dataframe2["Hour_Minute_"], sep = "-")
    dataframe2["Date__"] = pd.to_datetime(dataframe2["Date_Hour_Minute"], format = "%Y-%m-%d-%H-%M")
    dataframe2.drop(["Hour_Minute_", "Date2", "Date_Hour_Minute"], axis = 1, inplace = True)
    dataframe3 = dataframe2.rename(columns = {"Date__" : "Date_Index"})
    dataframe4 = dataframe3.set_index("Date_Index")
    dataframe4 = dataframe4.sort_index()
    dataframe4["minute"] = dataframe4["minute"].astype(int)
    dataframe4 = dataframe4["2015-09-07":"2017-12-03"]
    dataframe5 = dataframe4.copy()
    dataframe5["half_hour"] = dataframe4["minute"] // 30
    print(dataframe5.head())
    return dataframe5

In [22]:
def half_hourly_sales():
    half_hourly_sales = {}
    data = data_import()
    processed_data = {}
    for i in data.keys():
        processed_data.update({ i : processing_data(data[i]) })
    for restaurant in processed_data.keys():
        grouped_df = processed_data[restaurant].groupby(["Date", "hour", "half_hour"])["Total"].sum()
        dict_daily = {}
        for i in range(len(grouped_df.index.levels[0])):
            dict_daily.update({ grouped_df.index.levels[0][i] : grouped_df[grouped_df.index.levels[0][i]]} ) 
        daily_sales_hourly = pd.DataFrame.from_dict(dict_daily)
        daily_sales_hourly2 = daily_sales_hourly.fillna(0)
        daily_sales_hourly3 = daily_sales_hourly2.reset_index()
        half_hourly_sales.update( { restaurant : daily_sales_hourly3 })
    return half_hourly_sales

In [23]:
hh_sales = half_hourly_sales()

['bahariye.csv', 'muhurdar.csv', 'rihtim.csv', 'acibadem.csv', 'altiyol.csv', 'caddebostan.csv']
['bahariye.csv', 'muhurdar.csv', 'rihtim.csv', 'acibadem.csv', 'altiyol.csv', 'caddebostan.csv']
                     FKItemId           DateOfBusiness  FKOrderModeId  \
Date_Index                                                              
2015-09-07 00:11:00     99928  2015-09-07 00:00:00.000              4   
2015-09-07 00:16:00     99921  2015-09-07 00:00:00.000              4   
2015-09-07 00:34:00     10001  2015-09-07 00:00:00.000              4   
2015-09-07 00:38:00     99112  2015-09-07 00:00:00.000              4   
2015-09-07 00:38:00     99921  2015-09-07 00:00:00.000              4   

                                    BurgerMenu  Quantity  FKStoreId  \
Date_Index                                                            
2015-09-07 00:11:00  DB KNGCK MN+DBKOFTEBRG MN         1        223   
2015-09-07 00:16:00  2 LI KOFTEBURGER MENU KMP         1        223   
2015-09-07

In [24]:
len(hh_sales)

6

In [25]:
hh_sales.keys()

dict_keys(['altiyol.csv', 'rihtim.csv', 'acibadem.csv', 'caddebostan.csv', 'muhurdar.csv', 'bahariye.csv'])

In [26]:
a = hh_sales["muhurdar.csv"]

In [27]:
a.head(10)

Unnamed: 0,hour,half_hour,2015-09-07 00:00:00,2015-09-08 00:00:00,2015-09-09 00:00:00,2015-09-10 00:00:00,2015-09-11 00:00:00,2015-09-12 00:00:00,2015-09-13 00:00:00,2015-09-14 00:00:00,...,2017-11-24 00:00:00,2017-11-25 00:00:00,2017-11-26 00:00:00,2017-11-27 00:00:00,2017-11-28 00:00:00,2017-11-29 00:00:00,2017-11-30 00:00:00,2017-12-01 00:00:00,2017-12-02 00:00:00,2017-12-03 00:00:00
0,0,0,0.0,0.0,0.0,0.0,15.25,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.5,13.0,0.0
1,10,0,13.5,0.0,0.0,42.5,13.5,0.0,27.0,18.25,...,49.95,49.5,64.9,0.0,50.95,50.9,27.45,25.9,61.45,30.0
2,10,1,34.75,56.25,24.0,42.5,0.0,0.0,18.0,96.2,...,64.85,75.4,141.9,42.95,27.45,66.7,38.0,87.4,152.0,42.4
3,11,0,91.6,97.25,20.25,27.0,57.5,13.5,13.5,159.25,...,131.7,126.3,75.4,53.25,102.4,0.0,126.45,138.4,157.05,133.9
4,11,1,116.7,402.25,91.5,228.75,189.0,166.5,53.25,267.0,...,52.95,201.95,94.45,45.95,67.45,53.9,90.9,55.9,265.1,216.05
5,12,0,499.0,396.87,288.0,329.45,499.25,282.95,253.75,383.25,...,308.65,105.35,142.7,158.85,284.05,182.95,270.3,243.3,478.7,239.25
6,12,1,567.34,658.45,306.5,557.7,517.0,503.2,257.75,599.75,...,448.25,249.75,227.1,243.55,310.25,403.15,422.75,438.1,390.3,394.55
7,13,0,605.2,667.75,695.7,531.95,462.4,551.7,508.45,761.75,...,343.3,733.45,589.1,397.5,479.1,493.0,554.2,525.95,594.1,403.85
8,13,1,543.85,648.9,612.5,622.65,822.95,608.1,366.0,611.75,...,464.45,779.3,581.6,482.8,221.5,441.4,438.25,644.0,12.95,691.85
9,14,0,756.5,705.35,807.65,643.42,782.95,829.7,653.4,1051.25,...,557.8,656.1,777.6,274.05,371.0,554.45,553.2,785.69,0.0,921.45


In [28]:
## Lets write all dataframes to a csv file.

In [29]:
import os 
os.chdir(r"./HH_Sales")
os.getcwd()

'/home/inavasyon/Desktop/BurgerProject/HH_Sales'

In [30]:
for key in hh_sales.keys():
    hh_sales[key].to_csv("Half_hour_sales_{}".format(key))