## Initialize

In [2]:
# Python Built-in 
from datetime import date
from datetime import datetime

# Pandas Timestamp

## Creation

### Date Creation

In [8]:
dt1 = date(1992, 8, 24)
dt2 = date(2008, 9, 21)
# Weekday counts from 0: Monday -> 0, Sunday -> 6
dt2.weekday()

6

### Time Creation

In [62]:
# Year, Month, Day, Hour, Minute, Second
t1 = datetime(2017,10,1,15,23,25)
print(t1)

2017-10-01 15:23:25


### Create a new datetime/date object from an existing one

In [66]:
dt3 = dt1.replace(year=2005)
t2 = t1.replace(day = 12, second = 5)
print(dt3)
t2.isoformat()

2005-08-24


'2017-10-12T15:23:05'

### Create datetime object from string


In [82]:
# Requires exact match between string and format
dt3 = datetime.strptime("12/30/17 12:00:23", "%m/%d/%y %H:%M:%S")
print(dt3)

2017-12-30 12:00:23


### Create datetime object from Timestamp

In [84]:
ts = 312312333
dt3 = datetime.fromtimestamp(ts)
dt3

datetime.datetime(1979, 11, 24, 12, 25, 33)

## Math with Date

In [31]:
delta = dt2-dt1
delta.days

5872

In [85]:
delta

datetime.timedelta(days=5872)

In [40]:
dt3 = dt2 + delta
dt3

datetime.date(2024, 10, 19)

## Math with Datetime

In [94]:
from datetime import timedelta
tdelta = timedelta(days = 2, seconds = 1)

print(tdelta)
tdelta.total_seconds()

2 days, 0:00:01


172801.0

## Dates to String

In [41]:
# calls str(), in ISO format yyyy-mm-dd
print(dt3)

# calls repr()
display(dt3)

2024-10-19


datetime.date(2024, 10, 19)

In [51]:
dt3.strftime("%y-%m-%d")

'24-10-19'

In [56]:
# %A for full weekday name, %a for abbreviated weekday name
# %B for full month name, %b for abbreviated month name
# %j for the number of day in a year of a date in format with leading zeros
# %-j for the number of day in a year of a date in format without leading zeros

dt3.strftime("%A, %B %d, %Y; the %jth day of the year")

'Saturday, October 19, 2024; the 293th day of the year'

## Timezone

In [151]:
from datetime import datetime, timedelta, timezone

ET = timezone(timedelta(hours = -5))

# .replace() only change the timezone label, from 3:00 ET to 3:00 PT. Timestamp is changed
original = t1.replace(tzinfo=timezone.utc)
print('original: ', original)

# .astimezone() change the timestamp, from 3:00 ET to 12:00 PT
original_et = original.astimezone(ET)
print('ET:', original_et)

print(original.timestamp())
print(original_et.timestamp())

original_et.astimezone(timezone.utc).isoformat()

original:  2017-10-01 15:23:25+00:00
1506885805.0
ET: 2017-10-01 10:23:25-05:00
1506871405.0
1506871405.0


'2017-10-01T15:23:25+00:00'

In [144]:
from dateutil import tz

et = tz.gettz('America/New_York')
original_et = original.replace(tzinfo=et)
print(original_et)
print(original.astimezone(et))

2017-10-01 15:23:25-04:00
2017-10-01 11:23:25-04:00


## Pandas and Datetime


In [236]:
import pandas as pd

df = pd.read_csv('../dataset/capital-onebike.csv', parse_dates = ['Start date', 'End date'])
df.head(3)

Unnamed: 0,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
0,2017-10-01 15:23:25,2017-10-01 15:26:26,31038,Glebe Rd & 11th St N,31036,George Mason Dr & Wilson Blvd,W20529,Member
1,2017-10-01 15:42:57,2017-10-01 17:49:59,31036,George Mason Dr & Wilson Blvd,31036,George Mason Dr & Wilson Blvd,W20529,Casual
2,2017-10-02 06:37:10,2017-10-02 06:42:53,31036,George Mason Dr & Wilson Blvd,31037,Ballston Metro / N Stuart & 9th St N,W20529,Member


In [237]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 290 entries, 0 to 289
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Start date            290 non-null    datetime64[ns]
 1   End date              290 non-null    datetime64[ns]
 2   Start station number  290 non-null    int64         
 3   Start station         290 non-null    object        
 4   End station number    290 non-null    int64         
 5   End station           290 non-null    object        
 6   Bike number           290 non-null    object        
 7   Member type           290 non-null    object        
dtypes: datetime64[ns](2), int64(2), object(4)
memory usage: 18.2+ KB


In [238]:
df['Duration'] = df['End date'] - df['Start date']
df['Duration seconds'] = df['Duration'].dt.total_seconds()
df

Unnamed: 0,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type,Duration,Duration seconds
0,2017-10-01 15:23:25,2017-10-01 15:26:26,31038,Glebe Rd & 11th St N,31036,George Mason Dr & Wilson Blvd,W20529,Member,0 days 00:03:01,181.0
1,2017-10-01 15:42:57,2017-10-01 17:49:59,31036,George Mason Dr & Wilson Blvd,31036,George Mason Dr & Wilson Blvd,W20529,Casual,0 days 02:07:02,7622.0
2,2017-10-02 06:37:10,2017-10-02 06:42:53,31036,George Mason Dr & Wilson Blvd,31037,Ballston Metro / N Stuart & 9th St N,W20529,Member,0 days 00:05:43,343.0
3,2017-10-02 08:56:45,2017-10-02 09:18:03,31037,Ballston Metro / N Stuart & 9th St N,31295,Potomac & M St NW,W20529,Member,0 days 00:21:18,1278.0
4,2017-10-02 18:23:48,2017-10-02 18:45:05,31295,Potomac & M St NW,31230,Metro Center / 12th & G St NW,W20529,Member,0 days 00:21:17,1277.0
...,...,...,...,...,...,...,...,...,...,...
285,2017-12-29 14:32:55,2017-12-29 14:43:46,31242,18th St & Pennsylvania Ave NW,31265,5th St & Massachusetts Ave NW,W20529,Member,0 days 00:10:51,651.0
286,2017-12-29 15:08:26,2017-12-29 15:18:51,31265,5th St & Massachusetts Ave NW,31613,Eastern Market Metro / Pennsylvania Ave & 7th ...,W20529,Casual,0 days 00:10:25,625.0
287,2017-12-29 20:33:34,2017-12-29 20:38:13,31613,Eastern Market Metro / Pennsylvania Ave & 7th ...,31618,4th & East Capitol St NE,W20529,Member,0 days 00:04:39,279.0
288,2017-12-30 13:51:03,2017-12-30 13:54:33,31618,4th & East Capitol St NE,31610,Eastern Market / 7th & North Carolina Ave SE,W20529,Member,0 days 00:03:30,210.0


In [239]:
# df.resample('M', on = 'Start date')['Duration seconds'].mean()

df.resample('M', on = 'Start date').size()


Start date
2017-10-31    108
2017-11-30    103
2017-12-31     79
Freq: M, dtype: int64

In [240]:
df.groupby('Member type').size()

Member type
Casual     54
Member    236
dtype: int64

In [180]:
df['Member type'].value_counts()

Member    236
Casual     54
Name: Member type, dtype: int64

In [241]:
df['Start date'] = df['Start date'].dt.tz_localize('America/New_York', ambiguous = 'NaT')
df['End date'] = df['End date'].dt.tz_localize('America/New_York', ambiguous = 'NaT')

df['Start date'] = df['Start date'].dt.tz_convert('Europe/London')
df['End date'] = df['End date'].dt.tz_convert('Europe/London')


# df[df['Start time'].isna()]
df

Unnamed: 0,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type,Duration,Duration seconds
0,2017-10-01 20:23:25+01:00,2017-10-01 20:26:26+01:00,31038,Glebe Rd & 11th St N,31036,George Mason Dr & Wilson Blvd,W20529,Member,0 days 00:03:01,181.0
1,2017-10-01 20:42:57+01:00,2017-10-01 22:49:59+01:00,31036,George Mason Dr & Wilson Blvd,31036,George Mason Dr & Wilson Blvd,W20529,Casual,0 days 02:07:02,7622.0
2,2017-10-02 11:37:10+01:00,2017-10-02 11:42:53+01:00,31036,George Mason Dr & Wilson Blvd,31037,Ballston Metro / N Stuart & 9th St N,W20529,Member,0 days 00:05:43,343.0
3,2017-10-02 13:56:45+01:00,2017-10-02 14:18:03+01:00,31037,Ballston Metro / N Stuart & 9th St N,31295,Potomac & M St NW,W20529,Member,0 days 00:21:18,1278.0
4,2017-10-02 23:23:48+01:00,2017-10-02 23:45:05+01:00,31295,Potomac & M St NW,31230,Metro Center / 12th & G St NW,W20529,Member,0 days 00:21:17,1277.0
...,...,...,...,...,...,...,...,...,...,...
285,2017-12-29 19:32:55+00:00,2017-12-29 19:43:46+00:00,31242,18th St & Pennsylvania Ave NW,31265,5th St & Massachusetts Ave NW,W20529,Member,0 days 00:10:51,651.0
286,2017-12-29 20:08:26+00:00,2017-12-29 20:18:51+00:00,31265,5th St & Massachusetts Ave NW,31613,Eastern Market Metro / Pennsylvania Ave & 7th ...,W20529,Casual,0 days 00:10:25,625.0
287,2017-12-30 01:33:34+00:00,2017-12-30 01:38:13+00:00,31613,Eastern Market Metro / Pennsylvania Ave & 7th ...,31618,4th & East Capitol St NE,W20529,Member,0 days 00:04:39,279.0
288,2017-12-30 18:51:03+00:00,2017-12-30 18:54:33+00:00,31618,4th & East Capitol St NE,31610,Eastern Market / 7th & North Carolina Ave SE,W20529,Member,0 days 00:03:30,210.0


In [226]:
df['Start date'].dt.day_name()

0        Sunday
1        Sunday
2        Monday
3        Monday
4        Monday
         ...   
285      Friday
286      Friday
287      Friday
288    Saturday
289    Saturday
Name: Start date, Length: 290, dtype: object

# Pandas Timestamp

In [37]:
import pandas as pd

print(pd.Timestamp('2017-01-01'))
print(pd.Period('2017-01-01', freq='M')+1)
print(pd.date_range('2017-01-01', periods=12, freq='M'))

2017-01-01 00:00:00
2017-02
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-30', '2017-10-31', '2017-11-30', '2017-12-31'],
              dtype='datetime64[ns]', freq='M')


In [72]:
dates = pd.date_range('2017-01-01', periods=3, freq='M')
df = pd.DataFrame([300,400,500], index=dates, columns=['Price'])
df

Unnamed: 0,Price
2017-01-31,300
2017-02-28,400
2017-03-31,500


In [80]:
# Upsamling leads to missing data

df.asfreq('D', method='ffill').head()

Unnamed: 0,Price,diff,pct_change
2017-01-31,300,,
2017-02-01,300,,
2017-02-02,300,,
2017-02-03,300,,
2017-02-04,300,,


In [81]:
df.asfreq('D', method='bfill').head()

Unnamed: 0,Price,diff,pct_change
2017-01-31,300,,
2017-02-01,400,,0.333333
2017-02-02,400,,0.333333
2017-02-03,400,,0.333333
2017-02-04,400,,0.333333


In [84]:
df.asfreq('D', fill_value=0).head()

Unnamed: 0,Price,diff,pct_change
2017-01-31,300,,
2017-02-01,0,0.0,0.0
2017-02-02,0,0.0,0.0
2017-02-03,0,0.0,0.0
2017-02-04,0,0.0,0.0


In [89]:
df.loc['2017':]

Unnamed: 0,Price,diff,pct_change
2017-01-31,300,,
2017-02-28,400,,0.333333
2017-03-31,500,200.0,0.25


In [75]:
df['Price'].shift(periods=1)

2017-01-31      NaN
2017-02-28    300.0
2017-03-31    400.0
Freq: M, Name: Price, dtype: float64

In [79]:
df['diff'] = df['Price'].diff(periods=2)
df

Unnamed: 0,Price,diff,pct_change
2017-01-31,300,,
2017-02-28,400,,0.333333
2017-03-31,500,200.0,0.25


In [76]:
df['pct_change'] = df['Price'].pct_change(periods=1)
df

Unnamed: 0,Price,diff,pct_change
2017-01-31,300,,
2017-02-28,400,100.0,0.333333
2017-03-31,500,100.0,0.25


In [109]:
df.resample('Y').first().head()

Unnamed: 0,Price,diff,pct_change
2017-12-31,300,200.0,0.333333


In [None]:
df.resample('Y').last

# fill methods:

.interpolate()
.ffill()
.bfill()

In [94]:
df.index.max()

Timestamp('2017-03-31 00:00:00', freq='M')