In [28]:
import numpy as np
import pandas as pd
import datetime

## Extracting date and time parts from a datetime variable
The datetime variables can take dates, time, or date and time as values. The datetime variables are not used in their
raw format to build machine learning algorithms. Instead, we create additional features from them, and, in fact, we can
enrich the dataset dramatically by extracting information from the date and time.

In [29]:
data = pd.read_csv('../../../Last_fm/lastfm_records.csv')
data.head(5)

Unnamed: 0,Artist,Album,Track,Day,Month,Date,Year,Time,Duration,Listeners,PlayCount,Tags
0,James Bay,Electric Light,Wild Love,Saturday,Dec,19,2020,13:06,197000.0,53277.0,277241.0,"british,2010s,snl,2018,2018 single"
1,James Bay,Electric Light,Pink Lemonade,Saturday,Dec,19,2020,13:02,255000.0,51547.0,356515.0,"2018,rock,british,indie rock,pop rock"
2,James Bay,Electric Light,Wasted on Each Other,Saturday,Dec,19,2020,12:58,0.0,15399.0,81110.0,
3,James Bay,Electric Light,Intro,Saturday,Dec,19,2020,12:54,59000.0,10021.0,30385.0,
4,Hozier,Hozier (Deluxe Version),Cherry Wine (Live),Saturday,Dec,19,2020,12:53,240000.0,76909.0,341694.0,"soul,indie rock,acoustic,Awesome,irish"


In [30]:
data.dtypes

Artist        object
Album         object
Track         object
Day           object
Month         object
Date           int64
Year           int64
Time          object
Duration     float64
Listeners    float64
PlayCount    float64
Tags          object
dtype: object

In [31]:
data['Listened_at'] = data[['Month','Date','Year','Time']].apply(lambda x: '{}-{}-{} {}'.format(x[1],x[0],x[2],x[3]),axis=1)
data.drop(labels=['Month','Date','Year','Time'],axis=1,inplace=True)
data

Unnamed: 0,Artist,Album,Track,Day,Duration,Listeners,PlayCount,Tags,Listened_at
0,James Bay,Electric Light,Wild Love,Saturday,197000.0,53277.0,277241.0,"british,2010s,snl,2018,2018 single",19-Dec-2020 13:06
1,James Bay,Electric Light,Pink Lemonade,Saturday,255000.0,51547.0,356515.0,"2018,rock,british,indie rock,pop rock",19-Dec-2020 13:02
2,James Bay,Electric Light,Wasted on Each Other,Saturday,0.0,15399.0,81110.0,,19-Dec-2020 12:58
3,James Bay,Electric Light,Intro,Saturday,59000.0,10021.0,30385.0,,19-Dec-2020 12:54
4,Hozier,Hozier (Deluxe Version),Cherry Wine (Live),Saturday,240000.0,76909.0,341694.0,"soul,indie rock,acoustic,Awesome,irish",19-Dec-2020 12:53
...,...,...,...,...,...,...,...,...,...
50130,One Direction,Made in the A.M. (Deluxe Edition),I Want To Write You A Song,Thursday,179000.0,53009.0,400721.0,"pop,british,UK,acoustic,beautiful",3-Dec-2015 02:02
50131,One Direction,Made in the A.M. (Deluxe Edition),Love You Goodbye,Thursday,196000.0,73592.0,551937.0,"love at first listen,pop,british,sad,UK",3-Dec-2015 01:58
50132,One Direction,Made in the A.M. (Deluxe Edition),What A Feeling,Thursday,200000.0,79500.0,806809.0,"pop,british,easy listening,fav,love at first l...",3-Dec-2015 01:55
50133,One Direction,Made in the A.M. (Deluxe Edition),Love You Goodbye,Thursday,196000.0,73592.0,551937.0,"love at first listen,pop,british,sad,UK",3-Dec-2015 01:52


In [32]:
listened_at = pd.to_datetime(data['Listened_at'])
listened_at

0       2020-12-19 13:06:00
1       2020-12-19 13:02:00
2       2020-12-19 12:58:00
3       2020-12-19 12:54:00
4       2020-12-19 12:53:00
                ...        
50130   2015-12-03 02:02:00
50131   2015-12-03 01:58:00
50132   2015-12-03 01:55:00
50133   2015-12-03 01:52:00
50134   2015-12-01 20:48:00
Name: Listened_at, Length: 50135, dtype: datetime64[ns]

In [33]:
date = listened_at.dt.date
time = listened_at.dt.time

In [34]:
date,time

(0        2020-12-19
 1        2020-12-19
 2        2020-12-19
 3        2020-12-19
 4        2020-12-19
             ...    
 50130    2015-12-03
 50131    2015-12-03
 50132    2015-12-03
 50133    2015-12-03
 50134    2015-12-01
 Name: Listened_at, Length: 50135, dtype: object,
 0        13:06:00
 1        13:02:00
 2        12:58:00
 3        12:54:00
 4        12:53:00
            ...   
 50130    02:02:00
 50131    01:58:00
 50132    01:55:00
 50133    01:52:00
 50134    20:48:00
 Name: Listened_at, Length: 50135, dtype: object)

## Deriving representations of the year and month

In [41]:
df = pd.DataFrame(listened_at.values,columns=['listened_at'])
df['year'] = listened_at.dt.year
df['month'] = listened_at.dt.month
df['quarter'] = listened_at.dt.quarter
df['week'] = listened_at.dt.isocalendar().week

df

Unnamed: 0,listened_at,year,month,quarter,week
0,2020-12-19 13:06:00,2020,12,4,51
1,2020-12-19 13:02:00,2020,12,4,51
2,2020-12-19 12:58:00,2020,12,4,51
3,2020-12-19 12:54:00,2020,12,4,51
4,2020-12-19 12:53:00,2020,12,4,51
...,...,...,...,...,...
50130,2015-12-03 02:02:00,2015,12,4,49
50131,2015-12-03 01:58:00,2015,12,4,49
50132,2015-12-03 01:55:00,2015,12,4,49
50133,2015-12-03 01:52:00,2015,12,4,49


## Creating representations of day and week

In [47]:
df['day'] = listened_at.dt.day
df['day_of_week'] = listened_at.dt.dayofweek
df['week_day'] = listened_at.dt.day_name()
df

Unnamed: 0,listened_at,year,month,quarter,week,day,day_of_week,week_day
0,2020-12-19 13:06:00,2020,12,4,51,19,5,Saturday
1,2020-12-19 13:02:00,2020,12,4,51,19,5,Saturday
2,2020-12-19 12:58:00,2020,12,4,51,19,5,Saturday
3,2020-12-19 12:54:00,2020,12,4,51,19,5,Saturday
4,2020-12-19 12:53:00,2020,12,4,51,19,5,Saturday
...,...,...,...,...,...,...,...,...
50130,2015-12-03 02:02:00,2015,12,4,49,3,3,Thursday
50131,2015-12-03 01:58:00,2015,12,4,49,3,3,Thursday
50132,2015-12-03 01:55:00,2015,12,4,49,3,3,Thursday
50133,2015-12-03 01:52:00,2015,12,4,49,3,3,Thursday


## Extracting time parts from a time variable

In [49]:
df['hour'] = listened_at.dt.hour
df['minute'] = listened_at.dt.minute
df['seconds'] = listened_at.dt.second
df

Unnamed: 0,listened_at,year,month,quarter,week,day,day_of_week,week_day,hour,minute,seconds
0,2020-12-19 13:06:00,2020,12,4,51,19,5,Saturday,13,6,0
1,2020-12-19 13:02:00,2020,12,4,51,19,5,Saturday,13,2,0
2,2020-12-19 12:58:00,2020,12,4,51,19,5,Saturday,12,58,0
3,2020-12-19 12:54:00,2020,12,4,51,19,5,Saturday,12,54,0
4,2020-12-19 12:53:00,2020,12,4,51,19,5,Saturday,12,53,0
...,...,...,...,...,...,...,...,...,...,...,...
50130,2015-12-03 02:02:00,2015,12,4,49,3,3,Thursday,2,2,0
50131,2015-12-03 01:58:00,2015,12,4,49,3,3,Thursday,1,58,0
50132,2015-12-03 01:55:00,2015,12,4,49,3,3,Thursday,1,55,0
50133,2015-12-03 01:52:00,2015,12,4,49,3,3,Thursday,1,52,0


## Capturing the elapsed time between datetime variables
The datetime variables offer value individually, and they offer more value collectively when used together with other
datetime variables to derive important insights

In [50]:
df['from_today'] = datetime.date.today() - listened_at.dt.date
df

Unnamed: 0,listened_at,year,month,quarter,week,day,day_of_week,week_day,hour,minute,seconds,from_today
0,2020-12-19 13:06:00,2020,12,4,51,19,5,Saturday,13,6,0,122 days
1,2020-12-19 13:02:00,2020,12,4,51,19,5,Saturday,13,2,0,122 days
2,2020-12-19 12:58:00,2020,12,4,51,19,5,Saturday,12,58,0,122 days
3,2020-12-19 12:54:00,2020,12,4,51,19,5,Saturday,12,54,0,122 days
4,2020-12-19 12:53:00,2020,12,4,51,19,5,Saturday,12,53,0,122 days
...,...,...,...,...,...,...,...,...,...,...,...,...
50130,2015-12-03 02:02:00,2015,12,4,49,3,3,Thursday,2,2,0,1965 days
50131,2015-12-03 01:58:00,2015,12,4,49,3,3,Thursday,1,58,0,1965 days
50132,2015-12-03 01:55:00,2015,12,4,49,3,3,Thursday,1,55,0,1965 days
50133,2015-12-03 01:52:00,2015,12,4,49,3,3,Thursday,1,52,0,1965 days


In [51]:
df['ft_by_month'] = df.from_today/np.timedelta64(1,'M')
df['ft_by_month']

0         4.008296
1         4.008296
2         4.008296
3         4.008296
4         4.008296
           ...    
50130    64.559847
50131    64.559847
50132    64.559847
50133    64.559847
50134    64.625557
Name: ft_by_month, Length: 50135, dtype: float64

In [52]:
df['ft_by_year'] = df.from_today/np.timedelta64(1,'Y')
df['ft_by_year']

0        0.334025
1        0.334025
2        0.334025
3        0.334025
4        0.334025
           ...   
50130    5.379987
50131    5.379987
50132    5.379987
50133    5.379987
50134    5.385463
Name: ft_by_year, Length: 50135, dtype: float64

## Working with time in different time zones

In [63]:
df['london_time'] = pd.DatetimeIndex(df['listened_at']).tz_localize('UTC').tz_convert('Europe/London')
df[['listened_at','london_time']]

Unnamed: 0,listened_at,london_time
0,2020-12-19 13:06:00,2020-12-19 13:06:00+00:00
1,2020-12-19 13:02:00,2020-12-19 13:02:00+00:00
2,2020-12-19 12:58:00,2020-12-19 12:58:00+00:00
3,2020-12-19 12:54:00,2020-12-19 12:54:00+00:00
4,2020-12-19 12:53:00,2020-12-19 12:53:00+00:00
...,...,...
50130,2015-12-03 02:02:00,2015-12-03 02:02:00+00:00
50131,2015-12-03 01:58:00,2015-12-03 01:58:00+00:00
50132,2015-12-03 01:55:00,2015-12-03 01:55:00+00:00
50133,2015-12-03 01:52:00,2015-12-03 01:52:00+00:00
