# Working with dates

In [1]:
import pandas as pd
import numpy as np
import datetime as t
from datetime import datetime

In [11]:
dates = pd.to_datetime(['4th of July, 2015',
                        '2015-Jul-6', '07-07-2015', '20150708'])
dates

DatetimeIndex(['2015-07-04', '2015-07-06', '2015-07-07', '2015-07-08'], dtype='datetime64[ns]', freq=None)

In [12]:
dates.to_period('D')

PeriodIndex(['2015-07-04', '2015-07-06', '2015-07-07', '2015-07-08'], dtype='period[D]', freq='D')

In [2]:
now = datetime.now()
now

datetime.datetime(2020, 8, 3, 10, 30, 26, 891593)

In [5]:
now.day

3

In [8]:
stamp = datetime(2020, 8, 3)
stamp

datetime.datetime(2020, 8, 3, 0, 0)

In [9]:
str(stamp)

'2020-08-03 00:00:00'

In [11]:
stamp.strftime('%F')

'2020-08-03'

In [14]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4),
                       index=dates,
                       columns=['Colorado', 'Texas',
                                'New York', 'Ohio'])
long_df.loc['2001-05']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,-1.82132,-1.414852,0.729292,-0.471447
2001-05-09,1.319595,0.134077,0.1562,-1.127393
2001-05-16,0.063953,-0.370857,-0.755636,2.048942
2001-05-23,-0.618738,-1.360911,0.033984,-0.033279
2001-05-30,-0.362817,-1.834112,-0.912128,1.470157


In [15]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                          '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [17]:
grouped = dup_ts.groupby(level=0)

In [18]:
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int64

In [19]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

In [20]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

2011-01-02    0.785300
2011-01-05    0.123942
2011-01-07    0.480383
2011-01-08    0.979059
2011-01-10    0.056218
2011-01-12    0.648262
dtype: float64

In [25]:
resampler = ts.resample('D')

In [26]:
from pandas.tseries.offsets import Hour, Minute
hour = Hour()
hour

<Hour>

In [27]:
ts = pd.Series(np.random.randn(4),
               index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts

2000-01-31   -0.850258
2000-02-29    0.736118
2000-03-31   -1.351801
2000-04-30    0.587307
Freq: M, dtype: float64

In [36]:
ts[2]

-1.3518010537793712

In [38]:
ts.shift(1)[2]

0.7361178982136486

In [39]:
ts[2] / ts.shift(1)[2]

-1.8363920467900763

In [43]:
ts.shift(2, freq='M')

2000-03-31   -0.850258
2000-04-30    0.736118
2000-05-31   -1.351801
2000-06-30    0.587307
Freq: M, dtype: float64

In [44]:
import pytz

In [46]:
tz = pytz.timezone('Europe/Helsinki')
tz

<DstTzInfo 'Europe/Helsinki' LMT+1:40:00 STD>

In [47]:
p = pd.Period(2007, freq='A-DEC')
p

Period('2007', 'A-DEC')

In [51]:
p + 5

Period('2012', 'A-DEC')

In [49]:
p.asfreq('M', how='start')

Period('2007-01', 'M')