In [1]:
# Built-in libraries
from datetime import datetime, timedelta
import math

# NumPy, SciPy and Pandas
import pandas as pd
import numpy as np


In [4]:
"""
Function to extract a given context from a dataframe. The resulting context is saved in a csv file.
Current contexts:
- Weekday
- Weekend
- Fullweek
"""
def getContext(datasetName, context):
    dataframe = pd.read_csv('../data/processed/{}_dataset.csv'.format(datasetName), parse_dates=True, 
                            infer_datetime_format=True, index_col=0)

    # resample based on context    
    if context == "weekday":
        df_context = dataframe[(dataframe.index.weekday != 5) & 
                               (dataframe.index.weekday != 6)]
    elif context == "weekend":
        df_context = dataframe[(dataframe.index.weekday == 5) | 
                               (dataframe.index.weekday == 6)]
    elif context == "fullweek":
        df_context = dataframe.copy()
    else:
        print("Please choose a valid context")
        exit()
    
    # delete the dates with 0 values
    df_context = df_context[(df_context.T != 0).any()]
    # replace 0.0 with NaN to drop columns with NaN
    # df_context = df_context.replace(0.0, np.nan)
    # drop columns with all nan values
    df_context = df_context.dropna(axis=1, how='all') 
    # drop columns with more than 7 nan values (seems to be a sweet spot)
    df_context = df_context.dropna(thresh=len(df_context) - 7,axis=1)

    # save the file and return the dataframe
    df_context.to_csv("../data/processed/{}_{}_dataset.csv".format(datasetName, context))
    return df_context


In [6]:
df_BDG_weekday = getContext('BDG', 'weekday')
df_BDG_weekend = getContext('BDG', 'weekend')
df_BDG_fullweek = getContext('BDG', 'fullweek')

df_DGS_weekday = getContext('DGS', 'weekday')
df_DGS_weekend = getContext('DGS', 'weekend')
df_DGS_fullweek = getContext('DGS', 'fullweek')
