In [1]:
# ===========
# ENVIRONMENT
# ===========


import acquire as ac
import os
import sys
import pandas as pd
import numpy as np

from datetime import datetime




# =======
# ACQUIRE
# =======


# imported from acquire.py




# =======
# PREPARE
# =======


def remove_space(df, column):
    """
    Removes the colon between date and hour.
    """
    return df[column].str.replace(':', ' ', 1)


def process_datetime(df, column, locale):
    """
    Pre-processess timestamp column.
    """
    df[column] = remove_space(df, column)
    df[column] = convert_to_datetime(df, column)
    df = df.set_index(column)
    return set_utc(df, locale)




   item_id  sale_amount                      sale_date  sale_id  store_id  \
0       16         16.0  Sat, 25 May 2013 00:00:00 GMT   285001         7   
1       16         20.0  Sun, 26 May 2013 00:00:00 GMT   285002         7   
2       16          7.0  Mon, 27 May 2013 00:00:00 GMT   285003         7   
3       16         17.0  Tue, 28 May 2013 00:00:00 GMT   285004         7   
4       16         17.0  Wed, 29 May 2013 00:00:00 GMT   285005         7   
5       16         20.0  Thu, 30 May 2013 00:00:00 GMT   285006         7   
6       16         16.0  Fri, 31 May 2013 00:00:00 GMT   285007         7   
7       16         19.0  Sat, 01 Jun 2013 00:00:00 GMT   285008         7   
8       16         28.0  Sun, 02 Jun 2013 00:00:00 GMT   285009         7   
9       16         10.0  Mon, 03 Jun 2013 00:00:00 GMT   285010         7   

   item_brand                                      item_name  item_price  \
0  Burts Bees  Burts Bees Daily Moisturizing Cream Sensitive        5.17   


### Write a function to convert a date to a datetime data type.

In [2]:
def convert_to_datetime(df, column):
    """
    Converts string object to datetime object.
    """
    return pd.to_datetime(df[column])

### Write a function to change a datetime to UTC.

In [13]:
def set_utc(df, locale):
    """
    Converts to UTC time.
    """
    return df.tz_localize('utc').tz_convert(None)

### Write a function to parse a date column into 6 additional columns (while keeping the original date): year, quarter, month, day of month, day of week, weekend vs. weekday

In [4]:
def add_year(df, column):
    return df[column].dt.year


def add_quarter(df, column):
    return df[column].dt.quarter


def add_month(df, column):
    return df[column].dt.month


def add_day(df, column):
    return df[column].dt.day


def add_hour(df, column):
    return df[column].dt.hour


def add_weekday(df, column):
    return df[column].dt.weekday


def add_date_columns(df, column):
    df.reset_index(inplace=True)
    df['year'] = add_year(df, column)
    df['quarter'] = add_quarter(df, column)
    df['month'] = add_month(df, column)
    df['day'] = add_day(df, column)
    df['hour'] = add_hour(df, column)
    df['weekday'] = add_weekday(df, column)
    return df

### Add a column to your dataframe, sales_total, which is a derived from sale_amount (total items) and item_price.

In [18]:
def add_sum_total(df, column1, column2):
    return df[column1] * df[column2]
    

## Main

In [5]:
df = ac.get_data()

In [6]:
df['sale_date'] = convert_to_datetime(df, 'sale_date')

In [7]:
df = df.set_index('sale_date')

In [14]:
df = set_utc(df, 'America/Chicago')

In [16]:
df = add_date_columns(df, 'sale_date')

In [19]:
df['sales_total'] = add_sum_total(df, 'sale_amount', 'item_price')

## Create a new dataframe that aggregates the sales_total and sale_amount(item count) using sum and median by day of week.

In [24]:
df_agg = pd.DataFrame()

In [26]:
df_agg['sales_total'] = df.groupby('sales_total')['weekday'].sum()

In [27]:
df_agg.head(10)

Unnamed: 0_level_0,sales_total
sales_total,Unnamed: 1_level_1
0.0,2
0.84,6
1.68,1
1.8,1
2.4,4
2.52,6
2.61,3
3.0,8
3.36,25
3.4,1


In [20]:
df.head(10)

Unnamed: 0,sale_date,item_id,sale_amount,sale_id,store_id,item_brand,item_name,item_price,item_upc12,item_upc14,...,store_city,store_state,store_zipcode,year,quarter,month,day,hour,weekday,sales_total
0,2013-05-25,16,16.0,285001,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,5,25,0,5,82.72
1,2013-05-26,16,20.0,285002,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,5,26,0,6,103.4
2,2013-05-27,16,7.0,285003,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,5,27,0,0,36.19
3,2013-05-28,16,17.0,285004,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,5,28,0,1,87.89
4,2013-05-29,16,17.0,285005,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,5,29,0,2,87.89
5,2013-05-30,16,20.0,285006,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,5,30,0,3,103.4
6,2013-05-31,16,16.0,285007,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,5,31,0,4,82.72
7,2013-06-01,16,19.0,285008,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,6,1,0,5,98.23
8,2013-06-02,16,28.0,285009,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,6,2,0,6,144.76
9,2013-06-03,16,10.0,285010,7,Burts Bees,Burts Bees Daily Moisturizing Cream Sensitive,5.17,792850014008,792850014008,...,San Antonio,TX,78217,2013,2,6,3,0,0,51.7
