### Date Handling

Also includes performing LEFT JOINS

In [1]:
# Consolidate all imports here
import pandas as pd
import numpy as np
import datetime as dt
from collections import Counter

In [2]:
# Generate weekly calendar
calendar = pd.DataFrame({
    'Week Range': pd.date_range(start='2021-12-01', end=dt.datetime.now()).to_period('W').unique()
})

calendar[['Week Start', 'Week End']] = calendar['Week Range'].astype(str).str.split('/', expand=True)
calendar['Week Start'] = pd.to_datetime(calendar['Week Start'])
calendar.reset_index(inplace=True)
calendar.rename(columns={'index':'Week Number'}, inplace=True)
calendar['Week Number'] = calendar['Week Number'] + 1
calendar = calendar[['Week Range', 'Week Start', 'Week End', 'Week Number']]
calendar.sort_values(by='Week Start', ascending=False, inplace=True)
calendar.reset_index(drop=True, inplace=True)
# calendar.rename(columns={'Week Start':'Date Pulled'}, inplace=True)
calendar['Week End'] = pd.to_datetime(calendar['Week End'])
# calendar['Date Pulled Year'] = calendar['Date Pulled'].dt.year
# calendar = calendar[['Week Range', 'Date Pulled', 'Date Pulled Year', 'Week End', 'Week Number']]
calendar

Unnamed: 0,Week Range,Week Start,Week End,Week Number
0,2022-03-14/2022-03-20,2022-03-14,2022-03-20,16
1,2022-03-07/2022-03-13,2022-03-07,2022-03-13,15
2,2022-02-28/2022-03-06,2022-02-28,2022-03-06,14
3,2022-02-21/2022-02-27,2022-02-21,2022-02-27,13
4,2022-02-14/2022-02-20,2022-02-14,2022-02-20,12
5,2022-02-07/2022-02-13,2022-02-07,2022-02-13,11
6,2022-01-31/2022-02-06,2022-01-31,2022-02-06,10
7,2022-01-24/2022-01-30,2022-01-24,2022-01-30,9
8,2022-01-17/2022-01-23,2022-01-17,2022-01-23,8
9,2022-01-10/2022-01-16,2022-01-10,2022-01-16,7


In [3]:
# Create a dataframe of data to join on the calendar
df = pd.DataFrame({
    'Date Pulled': ['2021-11-29', '2021-12-07', '2021-12-13', '2021-12-20', '2021-12-27', '2022-01-03', '2022-01-10', '2022-01-17', '2022-01-25', '2022-01-31', '2022-02-07', '2022-02-15'],
    'Data': ['*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*']
})

df['Date Pulled'] = pd.to_datetime(df['Date Pulled'])
df

Unnamed: 0,Date Pulled,Data
0,2021-11-29,*
1,2021-12-07,*
2,2021-12-13,*
3,2021-12-20,*
4,2021-12-27,*
5,2022-01-03,*
6,2022-01-10,*
7,2022-01-17,*
8,2022-01-25,*
9,2022-01-31,*


In [4]:
# # Left join df and calendar
# df_merge = df.merge(calendar, how='left', on='Date Pulled')
# df_merge['Date Pulled Year'] = df_merge['Date Pulled'].dt.year
# df_merge = df_merge[['Date Pulled', 'Date Pulled Year', 'Week End', 'Week Number']]
# df_merge['Week End'] = pd.to_datetime(df_merge['Week End'])
# display(df_merge)
# # display(df_merge[df_merge['Week Number'].isnull()])

In [5]:
# Generate the weekly period based on Date Pulled
df2 = df.copy()
print(df2.dtypes)
df2['Week Range'] = df2['Date Pulled'].dt.to_period('W')
df2

Date Pulled    datetime64[ns]
Data                   object
dtype: object


Unnamed: 0,Date Pulled,Data,Week Range
0,2021-11-29,*,2021-11-29/2021-12-05
1,2021-12-07,*,2021-12-06/2021-12-12
2,2021-12-13,*,2021-12-13/2021-12-19
3,2021-12-20,*,2021-12-20/2021-12-26
4,2021-12-27,*,2021-12-27/2022-01-02
5,2022-01-03,*,2022-01-03/2022-01-09
6,2022-01-10,*,2022-01-10/2022-01-16
7,2022-01-17,*,2022-01-17/2022-01-23
8,2022-01-25,*,2022-01-24/2022-01-30
9,2022-01-31,*,2022-01-31/2022-02-06


In [6]:
# Merge on whole week-range
df_merge2 = df2.merge(calendar, how='left', on='Week Range')
df_merge2

Unnamed: 0,Date Pulled,Data,Week Range,Week Start,Week End,Week Number
0,2021-11-29,*,2021-11-29/2021-12-05,2021-11-29,2021-12-05,1
1,2021-12-07,*,2021-12-06/2021-12-12,2021-12-06,2021-12-12,2
2,2021-12-13,*,2021-12-13/2021-12-19,2021-12-13,2021-12-19,3
3,2021-12-20,*,2021-12-20/2021-12-26,2021-12-20,2021-12-26,4
4,2021-12-27,*,2021-12-27/2022-01-02,2021-12-27,2022-01-02,5
5,2022-01-03,*,2022-01-03/2022-01-09,2022-01-03,2022-01-09,6
6,2022-01-10,*,2022-01-10/2022-01-16,2022-01-10,2022-01-16,7
7,2022-01-17,*,2022-01-17/2022-01-23,2022-01-17,2022-01-23,8
8,2022-01-25,*,2022-01-24/2022-01-30,2022-01-24,2022-01-30,9
9,2022-01-31,*,2022-01-31/2022-02-06,2022-01-31,2022-02-06,10


In [7]:
# Validating to_period('W') method
date_now = pd.to_datetime(dt.datetime.now())
display(date_now)
display(date_now.to_period('W'))

Timestamp('2022-03-20 20:06:02.537692')

Period('2022-03-14/2022-03-20', 'W-SUN')