In [None]:
# Import the necessary Python libraries for data analysis
# Numpy allows you to do math on vectors (columns of data) and matrices (tables of data) without writing loops
import numpy as np
# Pandas allows you to read in data as Series and DataFrame, and calls Numpy to make calculations on these.
import pandas as pd

# Matplotlib allows you to create plots of your data, and seaborn enhances the aesthetics and creates more plot options
import matplotlib.pyplot as plt
import seaborn as sns

# This magic function creates plots that look aesthetically pleasing in your Jupyter notebook
%matplotlib inline

In [None]:
# This function takes in a DataFrame whose column names are in all lowercase, and words are split with underscores.
# It then makes the column names human-friendly by replacing the underscores with spacings, and capitalizing the first
# word in each column name.
# The argument (input) to this function is a DataFrame, denoted by df. The output is the same DataFrame, with the 
# column names reformatted.

def space_and_caps(df):
    # Convert the columns attribute of the DataFrame from an index to a list.
    column_namelist = list(df.columns)
    # Split up each word in the columns individually, and capitalize them.
    col_words = [[word.capitalize() for word in item.split("_")] for item in column_namelist]
    # String the words back together with a space between the words.
    col_words = [(" ").join(word_list) for word_list in col_words]
    # Replace the columns 
    df.columns = col_words
    return df

In [None]:
# This function takes in a DataFrame read from a .csv file as its first argument (df).
# The second argument, date_col_list, is a list of columns that the user wants to change into datetime format.
# The result of the function is the same DataFrame, with the required columns changed to datetimes.

def convert_cols_to_datetime(df, date_col_list):
    df[date_col_list] = df[date_col_list].apply(lambda x: pd.to_datetime(x))
    return df

In [None]:
# This function takes in a Series with data type datetime64, and returns a Series mapping it to the 
# day in the current year matching its day of week.
# Useful for looking at year-over-year seasonality patterns while also taking into account day-of-week fluctuations.

def map_to_current_yr_match_dow(date_col):
    # Find the current year
    current_year = pd.to_datetime('today').year
    # 364 days is the exact number of full 7-day cycles in a year, so this takes any date in a previous
    # year and finds the number of full years between that date and this year.
    # It then adds 364 * the number of years to get the date in our current year matching its day of week.
    new_col = date_col.apply(lambda x: x + pd.DateOffset(days = 364*(current_year - x.year)))
    return new_col