In [1]:
import pandas as pd

In [2]:
DATA_PATHS = ['/opt/notebooks/datasets/NB_GA_Data_1.xlsx', '/opt/notebooks/datasets/NB_GA_Data_2.xlsx']
RAW_DATA_LOCATION = '/opt/notebooks/intermediate_dataset/raw.csv'

In [3]:
def pull_raw_traffic_data(file_locations):
    """
    Function for reading in the raw .xlsx file containing the traffic timeseries data
    :param file_locations: list, consisting of: locations on disk that .xlsx is stored.
    """
    # note the use of None here. It allows for reading in data from all worksheets
    # See https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html for more information on pd.read_excel()
    raw = [pd.read_excel(file_location, None) for file_location in file_locations]
    # Sheet1 is a data summary worksheet in sample data file, so should be skipped
    frames = [dict[region_code] for dict in raw for region_code in dict if region_code != 'Sheet1']
    raw = pd.concat(frames)
    raw['Date'] = pd.to_datetime(raw['Date'])
    raw.set_index('Date', inplace=True)
    raw.index = pd.DatetimeIndex(raw.index.values, freq=raw.index.inferred_freq)
    asc = raw.sort_index()
    return asc

## write data to csv

In [4]:
raw = pull_raw_traffic_data(DATA_PATHS)

In [5]:
raw

Unnamed: 0,Device Category,Hostname,Default Channel Grouping,Sessions,Pageviews
2016-11-01,mobile,www.joesnewbalanceoutlet.com,Comparison Shopping Engines,130,130
2016-11-01,mobile,www.joesnewbalanceoutlet.com,Affiliate,2081,10924
2016-11-01,desktop,www.joesnewbalanceoutlet.com,Social,520,4031
2016-11-01,desktop,www.joesnewbalanceoutlet.com,Referral,1951,15085
2016-11-01,desktop,www.joesnewbalanceoutlet.com,Paid Search,2341,9103
...,...,...,...,...,...
2022-12-31,mobile,www.newbalance.ie,Referral,17,34
2022-12-31,mobile,www.newbalance.ie,Social,17,86
2022-12-31,mobile,www.newbalance.ie,Social Paid,2337,4314
2022-12-31,mobile,www.newbalance.it,Direct,1530,4743


In [12]:
raw.to_csv(RAW_DATA_LOCATION, index_label='Date')

## read in data written before

In [13]:
test = pd.read_csv(RAW_DATA_LOCATION)

In [14]:
test

Unnamed: 0,Date,Device Category,Hostname,Default Channel Grouping,Sessions,Pageviews
0,2016-11-01,mobile,www.joesnewbalanceoutlet.com,Comparison Shopping Engines,130,130
1,2016-11-01,mobile,www.joesnewbalanceoutlet.com,Affiliate,2081,10924
2,2016-11-01,desktop,www.joesnewbalanceoutlet.com,Social,520,4031
3,2016-11-01,desktop,www.joesnewbalanceoutlet.com,Referral,1951,15085
4,2016-11-01,desktop,www.joesnewbalanceoutlet.com,Paid Search,2341,9103
...,...,...,...,...,...,...
453616,2022-12-31,mobile,www.newbalance.ie,Referral,17,34
453617,2022-12-31,mobile,www.newbalance.ie,Social,17,86
453618,2022-12-31,mobile,www.newbalance.ie,Social Paid,2337,4314
453619,2022-12-31,mobile,www.newbalance.it,Direct,1530,4743
