## Log and Power Transformations in Practice

In [18]:
from scipy.optimize import minimize
import numpy as np
np.random.seed(0)
# Setting the random seed to obtain the same set of random number 

In [19]:
pts = 10000
vals = np.random.lognormal(0, 1.0, pts)
# generate random numbers from a log-normal distribution 

In [20]:
from sklearn.preprocessing import StandardScaler
from scipy.stats import normaltest
scaler = StandardScaler()
vals_ss = scaler.fit_transform(vals.reshape(-1,1))
f, p = normaltest(vals_ss)
p

array([0.])

The null hypothesis of this statistical test is that the sample comes from a normal distribution.
p < threshold, reject the null hypothesis => The distribution after apply normaltest does not follow the normal distribution

In [21]:
from sklearn.preprocessing import minmax_scale
vals_mm = minmax_scale(vals)
_, p = normaltest(vals_mm.squeeze())
print(f'significance: {p:.2f}')

significance: 0.00


log_transformed = np.log(vals)
_,p = normaltest(log_transformed)
print(f'significance: {p:.2f}')

The significance is 0.46, we can't reject the null hypothesis. The distribution is similar to normal

In [23]:
#Apply Box-Cox transformation
from scipy.stats import boxcox
vals_bc = boxcox(vals, 0.0)
_, p = normaltest(vals_bc)

print(f'significance: {p:.2f}')

significance: 0.46


## Imputation

In [26]:
from sklearn.impute import SimpleImputer
imp_mean = SimpleImputer(missing_values = np.nan, strategy = 'mean')
imp_mean.fit([[7,2,3],[4,np.nan,6],[10,5,9]])
# SimpleImputer()
df = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]
print(imp_mean.transform(df))

[[ 7.   2.   3. ]
 [ 4.   3.5  6. ]
 [10.   3.5  9. ]]


## Holiday Features

In [30]:
pip install workalendar

Collecting workalendar
  Downloading workalendar-17.0.0-py3-none-any.whl (210 kB)
Collecting pyluach
  Downloading pyluach-2.2.0-py3-none-any.whl (25 kB)
Collecting lunardate
  Downloading lunardate-0.2.0-py3-none-any.whl (5.6 kB)
Collecting tzdata
  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)
Installing collected packages: tzdata, pyluach, lunardate, workalendar
Successfully installed lunardate-0.2.0 pyluach-2.2.0 tzdata-2023.3 workalendar-17.0.0
Note: you may need to restart the kernel to use updated packages.


In [37]:
from workalendar.europe.united_kingdom import UnitedKingdom
UnitedKingdom().holidays()

[(datetime.date(2023, 1, 1), 'New year'),
 (datetime.date(2023, 1, 2), 'New Year shift'),
 (datetime.date(2023, 4, 7), 'Good Friday'),
 (datetime.date(2023, 4, 9), 'Easter Sunday'),
 (datetime.date(2023, 4, 10), 'Easter Monday'),
 (datetime.date(2023, 5, 1), 'Early May Bank Holiday'),
 (datetime.date(2023, 5, 29), 'Spring Bank Holiday'),
 (datetime.date(2023, 8, 28), 'Late Summer Bank Holiday'),
 (datetime.date(2023, 12, 25), 'Christmas Day'),
 (datetime.date(2023, 12, 26), 'Boxing Day')]

In [38]:
from typing import List
from dateutil.relativedelta import relativedelta, TH 
import datetime
from workalendar.usa import California
def create_custom_holidays(year: int)-> List:
    custom_holidays = California().holidays()
    custom_holidays.append(((datetime.datetime(year, 11, 1) +
                           relativedelta(weekday=TH(+4))
                           + datetime.timedelta(days=1)).date(),
                          'Black Friday'))
    # TH(+4): represent the fourth occurence of the specified weekday.
    # timedelta(days=1), add one day to the date and time, date() convert to date
    return {k: v for (k,v) in custom_holidays}
custom_holidays = create_custom_holidays(2023)
    
custom_holidays

{datetime.date(2023, 1, 1): 'New year',
 datetime.date(2023, 1, 2): 'New year (Observed)',
 datetime.date(2023, 1, 16): 'Birthday of Martin Luther King, Jr.',
 datetime.date(2023, 2, 20): "Washington's Birthday",
 datetime.date(2023, 3, 31): 'Cesar Chavez Day',
 datetime.date(2023, 5, 29): 'Memorial Day',
 datetime.date(2023, 7, 4): 'Independence Day',
 datetime.date(2023, 9, 4): 'Labor Day',
 datetime.date(2023, 11, 10): 'Veterans Day (Observed)',
 datetime.date(2023, 11, 11): 'Veterans Day',
 datetime.date(2023, 11, 23): 'Thanksgiving Day',
 datetime.date(2023, 11, 24): 'Black Friday',
 datetime.date(2023, 12, 25): 'Christmas Day'}

In [39]:
def is_holiday(current_date: datetime.date):
    """Determine if we have a holiday"""
    return custom_holidays.get(current_date, False)
today = datetime.date(2021,4,11)
is_holiday(today)

False

## Date Annotation

In [42]:
import calendar
calendar.monthrange(2021,1)
#This means the first weekday of 2021 was a Friday, January 2021 had 31 days

(4, 31)

In [43]:
from datetime import date
def year_anchor(current_date: datetime.date):
    return (
    (current_date - date(current_date.year, 1, 1)).days,
    (date(current_date.year, 12,31) - current_date).days)
year_anchor(today)

(100, 264)

In [45]:
def month_anchor(current_date: datetime.date):
    last_day = calendar.monthrange(current_date.year, current_date.month)[0]
    return (
    (current_date - datetime.date(current_date.year, current_date.month, 1)).days,
        (current_date - datetime.date(current_date.year, current_date.month, last_day)).days)
month_anchor(today)

(10, 8)

In [56]:
month_anchor(datetime.date(2017,1,10))

(9, 4)

In [46]:
#Extract the number of days from the first of the month and to the end of the month

## Paydays 

In [57]:
def get_last_friday(current_date: datetime.date, weekday = calendar.FRIDAY):
    return max(week[weekday]
              for week in  calendar.monthcalendar(current_date.year, current_date.month))
get_last_friday(today)

30