# type of datas and tools for date and time

In [31]:
from datetime import datetime
import pandas as pd

In [2]:
now = datetime.now()

now

datetime.datetime(2023, 12, 16, 20, 24, 5, 127059)

In [3]:
now.year, now.month, now.day

(2023, 12, 16)

In [4]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)

In [5]:
delta

datetime.timedelta(days=926, seconds=56700)

In [6]:
delta.days

926

In [7]:
delta.seconds

56700

### we can do sum, mult or sub from datetime

In [8]:
from datetime import timedelta

In [9]:
start = datetime(2011, 1, 7)

In [10]:
start + timedelta(12)

datetime.datetime(2011, 1, 19, 0, 0)

In [11]:
start - 2 * timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

## conversion between string and datetime

In [13]:
stamp = datetime(2011, 1, 3)

In [14]:
str(stamp)

'2011-01-03 00:00:00'

In [18]:
stamp.strftime('%Y-%m-%d')

'2011-01-03'

In [19]:
value = '2014-01-05'

In [21]:
datetime.strptime(value, '%Y-%m-%d')

datetime.datetime(2014, 1, 5, 0, 0)

In [22]:
datestrs = ['7/6/2001', '8/2/2004']

In [25]:
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

[datetime.datetime(2001, 7, 6, 0, 0), datetime.datetime(2004, 8, 2, 0, 0)]

In [26]:
from dateutil.parser import parse

In [27]:
parse('2005-10-03')

datetime.datetime(2005, 10, 3, 0, 0)

In [28]:
parse('Jan 23, 1987 08:31 PM')

datetime.datetime(1987, 1, 23, 20, 31)

In [29]:
parse('04/05/2001', dayfirst=True)

datetime.datetime(2001, 5, 4, 0, 0)

In [30]:
datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']

In [32]:
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [33]:
idx = pd.to_datetime(datestrs + [None])

In [34]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [35]:
idx[2]

NaT

In [36]:
pd.isnull(idx)

array([False, False,  True])

# Basics about time series

In [41]:
from datetime import datetime
import numpy as np

In [44]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]

In [45]:
dates

[datetime.datetime(2011, 1, 2, 0, 0),
 datetime.datetime(2011, 1, 5, 0, 0),
 datetime.datetime(2011, 1, 7, 0, 0),
 datetime.datetime(2011, 1, 8, 0, 0),
 datetime.datetime(2011, 1, 10, 0, 0),
 datetime.datetime(2011, 1, 12, 0, 0)]

In [46]:
ts = pd.Series(np.random.randn(6), index=dates)

In [47]:
ts

2011-01-02   -0.701957
2011-01-05    0.146683
2011-01-07    0.049474
2011-01-08    0.116163
2011-01-10    0.967597
2011-01-12    0.549301
dtype: float64

In [48]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [49]:
ts + ts[::2]

2011-01-02   -1.403915
2011-01-05         NaN
2011-01-07    0.098949
2011-01-08         NaN
2011-01-10    1.935195
2011-01-12         NaN
dtype: float64

In [51]:
ts.index.dtype

dtype('<M8[ns]')

In [52]:
stamp = ts.index[0]

In [53]:
stamp

Timestamp('2011-01-02 00:00:00')

# Indexing, selection and generation of subsets

In [54]:
stamp = ts.index[2]

In [56]:
ts[stamp]

0.049474428249271805

In [57]:
ts['1/10/2011']

0.9675974645537238

In [58]:
ts['20110110']

0.9675974645537238

In [60]:
longer_ts = pd.Series(np.random.randn(1000), 
                      index=pd.date_range('1/1/2000', periods=1000))

In [61]:
longer_ts

2000-01-01    1.686428
2000-01-02    2.344407
2000-01-03    0.561269
2000-01-04    0.745338
2000-01-05   -1.712539
                ...   
2002-09-22    1.381219
2002-09-23   -0.888535
2002-09-24   -2.315309
2002-09-25    0.129366
2002-09-26   -1.728088
Freq: D, Length: 1000, dtype: float64

In [62]:
longer_ts['2001']

2001-01-01   -0.074962
2001-01-02   -0.045572
2001-01-03   -0.997197
2001-01-04    0.692548
2001-01-05   -1.471048
                ...   
2001-12-27   -0.191306
2001-12-28    0.166633
2001-12-29    1.870571
2001-12-30    0.963967
2001-12-31   -1.676432
Freq: D, Length: 365, dtype: float64

In [65]:
longer_ts['2001-05']

2001-05-01    0.030358
2001-05-02   -0.486717
2001-05-03    0.430115
2001-05-04    0.172492
2001-05-05   -2.397621
2001-05-06   -0.181273
2001-05-07    0.340808
2001-05-08    1.302046
2001-05-09   -1.200169
2001-05-10   -0.079993
2001-05-11    0.010712
2001-05-12    0.189204
2001-05-13    0.649148
2001-05-14   -0.249260
2001-05-15    2.691187
2001-05-16    0.426688
2001-05-17   -0.114991
2001-05-18    0.229189
2001-05-19    0.673246
2001-05-20   -2.256392
2001-05-21   -0.512525
2001-05-22   -0.420850
2001-05-23   -0.926045
2001-05-24    1.259494
2001-05-25    0.219852
2001-05-26    0.395901
2001-05-27    0.548964
2001-05-28    0.775034
2001-05-29    0.730190
2001-05-30    0.695832
2001-05-31    0.122485
Freq: D, dtype: float64

In [66]:
ts[datetime(2011, 1, 7):]

2011-01-07    0.049474
2011-01-08    0.116163
2011-01-10    0.967597
2011-01-12    0.549301
dtype: float64

In [68]:
ts

2011-01-02   -0.701957
2011-01-05    0.146683
2011-01-07    0.049474
2011-01-08    0.116163
2011-01-10    0.967597
2011-01-12    0.549301
dtype: float64

In [70]:
ts['1/2/2011':'1/8/2011']

2011-01-02   -0.701957
2011-01-05    0.146683
2011-01-07    0.049474
2011-01-08    0.116163
dtype: float64

In [72]:
ts.truncate(after='1/9/2011')

2011-01-02   -0.701957
2011-01-05    0.146683
2011-01-07    0.049474
2011-01-08    0.116163
dtype: float64

## You can use it in Dataframes as well

In [73]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')

In [74]:
dates

DatetimeIndex(['2000-01-05', '2000-01-12', '2000-01-19', '2000-01-26',
               '2000-02-02', '2000-02-09', '2000-02-16', '2000-02-23',
               '2000-03-01', '2000-03-08', '2000-03-15', '2000-03-22',
               '2000-03-29', '2000-04-05', '2000-04-12', '2000-04-19',
               '2000-04-26', '2000-05-03', '2000-05-10', '2000-05-17',
               '2000-05-24', '2000-05-31', '2000-06-07', '2000-06-14',
               '2000-06-21', '2000-06-28', '2000-07-05', '2000-07-12',
               '2000-07-19', '2000-07-26', '2000-08-02', '2000-08-09',
               '2000-08-16', '2000-08-23', '2000-08-30', '2000-09-06',
               '2000-09-13', '2000-09-20', '2000-09-27', '2000-10-04',
               '2000-10-11', '2000-10-18', '2000-10-25', '2000-11-01',
               '2000-11-08', '2000-11-15', '2000-11-22', '2000-11-29',
               '2000-12-06', '2000-12-13', '2000-12-20', '2000-12-27',
               '2001-01-03', '2001-01-10', '2001-01-17', '2001-01-24',
      

In [75]:
long_df = pd.DataFrame(np.random.randn(100, 4), index=dates, columns=['SP', 'PE', 'BA', 'RJ'])

In [76]:
long_df

Unnamed: 0,SP,PE,BA,RJ
2000-01-05,-1.887537,0.020472,0.330545,-0.904534
2000-01-12,-1.742825,-0.393343,-0.268843,2.099011
2000-01-19,0.637088,-1.070478,-0.718135,-0.382335
2000-01-26,0.624847,1.118048,1.661265,-1.411199
2000-02-02,-0.832674,2.453654,-0.169162,-0.460395
...,...,...,...,...
2001-10-31,-1.366952,0.582982,-0.096415,-0.560480
2001-11-07,0.348122,1.016142,1.195730,-0.591810
2001-11-14,-0.431549,1.487987,-0.332073,-1.410208
2001-11-21,-0.718840,-1.288410,-1.265072,1.774499


In [79]:
long_df.loc['5-2001']

Unnamed: 0,SP,PE,BA,RJ
2001-05-02,-1.093345,0.030116,-0.482647,1.293262
2001-05-09,-1.327504,0.245911,-0.2226,-0.413712
2001-05-16,0.401895,0.123514,-1.20436,-0.547278
2001-05-23,-1.383113,-0.448157,0.494512,-0.478459
2001-05-30,0.120219,-0.439205,0.192256,-1.064298
