# Encoding dates

In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
from torch import nn

ROOT_DIR = os.path.dirname(os.path.abspath(""))

file = os.path.join(ROOT_DIR, "data/sitges_access.csv")
df = pd.read_csv(file)
df.head()

Unnamed: 0.1,Unnamed: 0,server_name,IP,logname,authenticate,date,petition,URL,status,bytes,referer,user-agent,level
0,0,sitgesanytime.com,47.76.35.19,-,-,2024-01-22 00:00:00+01:00,HEAD,/fr/pag492/explora-platges-i-ports-2/id12/les-...,301,4840,-,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,0
1,1,www.sitgesanytime.com,47.76.35.19,-,-,2024-01-22 00:00:01+01:00,HEAD,/fr/pag492/explora-platges-i-ports-2/id12/les-...,200,5223,-,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,0
2,2,www.sitgesanytime.com,40.77.167.53,-,-,2024-01-22 00:00:06+01:00,GET,/ca/noticias/84/sitges-obt%C3%A9-el-certificat...,404,2509,-,"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Ge...",5
3,3,sitgesanytime.com,47.76.35.19,-,-,2024-01-22 00:00:06+01:00,HEAD,/fr/pag492/explora-platges-i-ports-2/id12/les-...,301,4840,-,Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple...,0
4,4,www.sitgesanytime.com,47.76.35.19,-,-,2024-01-22 00:00:07+01:00,HEAD,/fr/pag492/explora-platges-i-ports-2/id12/les-...,200,5260,-,Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple...,0


### Timestamp conversion

In [2]:
datetime_str = str(df["date"][513]).split("+")[0]
time = pd.to_datetime(datetime_str).timestamp()
datetime_str, time

('2024-01-22 00:10:41', 1705882241.0)

In [3]:
date = "2150-01-22 00:10:41"
pd.to_datetime(date).timestamp()

5682096641.0

In [4]:
date = "2080-01-22 23:59:59"
hour = pd.to_datetime(date).hour

# Convert hours to cyclical features
sin_hour = np.sin(2 * np.pi * hour / 24.0)
cos_hour = np.cos(2 * np.pi * hour / 24.0)

print(sin_hour, cos_hour)

-0.25881904510252157 0.9659258262890681


### Cyclical encoding

In [33]:
def sin_transform(x, period=24):
	return np.sin(2 * np.pi * x / period)

def cos_transform(x, period=24):
	return np.cos(2 * np.pi * x / period)

df_temp = df.copy().sample(frac=1, random_state=42)

In [34]:
df_temp['date'] = pd.to_datetime(df_temp['date'])
df_temp['month'] = df_temp['date'].dt.month
df_temp['day'] = df_temp['date'].dt.day
df_temp['weekday'] = df_temp['date'].dt.weekday
df_temp['hour'] = df_temp['date'].dt.hour
df_temp['minute'] = df_temp['date'].dt.minute

In [36]:
df_temp["month_sin"] = sin_transform(df_temp["month"], 12)
df_temp["month_cos"] = cos_transform(df_temp["month"], 12)
df_temp["day_sin"] = sin_transform(df_temp["day"], 31)
df_temp["day_cos"] = cos_transform(df_temp["day"], 31)
df_temp["weekday_sin"] = sin_transform(df_temp["weekday"], 7)
df_temp["weekday_cos"] = cos_transform(df_temp["weekday"], 7)
df_temp["hour_sin"] = sin_transform(df_temp["hour"], 24)
df_temp["hour_cos"] = cos_transform(df_temp["hour"], 24)
df_temp["minute_sin"] = sin_transform(df_temp["minute"], 60)
df_temp["minute_cos"] = cos_transform(df_temp["minute"], 60)

df_temp.drop(["date", 'month', 'day', 'weekday', 'hour', 'minute'], axis=1, inplace=True)
df_temp.head()

Unnamed: 0.1,Unnamed: 0,server_name,IP,logname,authenticate,petition,URL,status,bytes,referer,...,month_sin,month_cos,day_sin,day_cos,weekday_sin,weekday_cos,hour_sin,hour_cos,minute_sin,minute_cos
294460,111926,www.sitgesanytime.com,86.4.35.245,-,-,GET,/plantilles/turisme/js/responsivemenu.js?v=3 H...,200,1913,https://www.sitgesanytime.com/en/pl409/blog/id...,...,0.5,0.866025,-0.998717,-0.050649,0.781831,0.62349,-0.965926,0.258819,0.994522,0.104528
716149,119842,www.sitgesanytime.com,168.119.65.47,-,-,GET,/plantilles/turisme/css/estils-mw800.css?v=11 ...,200,3914,https://www.sitgesanytime.com/es/planifica-el-...,...,0.5,0.866025,-0.848644,0.528964,-0.433884,-0.900969,-0.866025,0.5,0.743145,-0.669131
238657,56123,www.sitgesanytime.com,64.124.8.65,-,-,GET,/ca/pl349/actualitat/noticies/id189/l-exposici...,200,13883,-,...,0.5,0.866025,-0.998717,-0.050649,0.781831,0.62349,0.5,-0.8660254,-0.104528,0.994522
602556,6249,www.sitgesanytime.com,216.244.66.199,-,-,GET,/ca/noticias/20/la-gay-pride-reivindica-els-dr...,404,7269,-,...,0.5,0.866025,-0.848644,0.528964,-0.433884,-0.900969,0.258819,0.9659258,0.743145,0.669131
347541,31977,www.sitgesanytime.com,168.119.65.45,-,-,GET,/plantilles/turisme/css/estils-mw500.css?v=14 ...,200,1233,https://www.sitgesanytime.com/fr/pl50/planifie...,...,0.5,0.866025,-0.988468,0.151428,0.974928,-0.222521,1.0,6.123234000000001e-17,-0.978148,0.207912
