In [10]:
%matplotlib inline
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn; seaborn.set()


In [11]:
np.__version__

'1.19.1'

In [12]:
pd.__version__

'1.1.3'

## Dates and Times in Python

### Native Python dates and times: datetime and dateutil

In [13]:
datetime(year=2020, month=7, day=4)

datetime.datetime(2020, 7, 4, 0, 0)

In [14]:
from dateutil import parser
date = parser.parse("2015 of 15  may ")
date

datetime.datetime(2015, 5, 15, 0, 0)

In [15]:
# get week
date.strftime('%A')

'Friday'

### Typed arrays of times: NumPy's datetime64

In [16]:
date = np.array('2015-07-04', dtype=np.datetime64)
date

array('2015-07-04', dtype='datetime64[D]')

In [17]:
#　vectorized operations 
date = date + np.arange(12)

print(date.shape)
date


(12,)


array(['2015-07-04', '2015-07-05', '2015-07-06', '2015-07-07',
       '2015-07-08', '2015-07-09', '2015-07-10', '2015-07-11',
       '2015-07-12', '2015-07-13', '2015-07-14', '2015-07-15'],
      dtype='datetime64[D]')

In [18]:
# NumPy will infer the desired unit from the input
np.datetime64('2015-07-04')

numpy.datetime64('2015-07-04')

In [19]:
np.datetime64('2015-07-04 12:00')  #  time zone is automatically set to the local time on the computer

numpy.datetime64('2015-07-04T12:00')

In [20]:
# force a nanosecond-based time:
np.datetime64('2015-07-04 12:59:59.50', 'ns')

numpy.datetime64('2015-07-04T12:59:59.500000000')

### Dates and times in pandas: best of both worlds

In [21]:
date = pd.to_datetime("4th of July, 2015")
date

Timestamp('2015-07-04 00:00:00')

In [22]:
date.strftime('%A')

'Saturday'

In [23]:
#  NumPy-style vectorized operations directly on this same object:

In [24]:
date + pd.to_timedelta(np.arange(12), 'D')  # frequency code:'D' to indicate daily frequency:

DatetimeIndex(['2015-07-04', '2015-07-05', '2015-07-06', '2015-07-07',
               '2015-07-08', '2015-07-09', '2015-07-10', '2015-07-11',
               '2015-07-12', '2015-07-13', '2015-07-14', '2015-07-15'],
              dtype='datetime64[ns]', freq=None)

### Pandas Time Series: Indexing by Time
Where the Pandas time series tools really become useful is when you begin to index data by timestamps. 

In [25]:
timeIndex = pd.DatetimeIndex(['2014-07-04', '2014-08-04',
                          '2015-07-04', '2015-08-04'])
data = pd.Series([0, 1, 2, 3], index=timeIndex)
data

2014-07-04    0
2014-08-04    1
2015-07-04    2
2015-08-04    3
dtype: int64

In [26]:
data['2014-07-04':'2015-07-04']
data.loc['2014-07-04':'2015-07-04']
 

2014-07-04    0
2014-08-04    1
2015-07-04    2
dtype: int64

In [27]:
# special date-only indexing operations, 
# such as passing a year to obtain a slice of all data from that year:
data['2015']
data.loc['2015']

2015-07-04    2
2015-08-04    3
dtype: int64

### Pandas Time Series Data Structures

In [28]:
# pd.to_datetime() => parse a wide variety of formats 
dates = pd.to_datetime([datetime(2015, 7, 3), '4th of July, 2015',
                       '2015-Jul-6', '07-07-2015', '20150708'])
dates

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
               '2015-07-08'],
              dtype='datetime64[ns]', freq=None)

In [29]:
#  interval
dates.to_period('D')

PeriodIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
             '2015-07-08'],
            dtype='period[D]', freq='D')

In [30]:
# durations
dates - dates[0]

TimedeltaIndex(['0 days', '1 days', '3 days', '4 days', '5 days'], dtype='timedelta64[ns]', freq=None)

### Regular sequences: pd.date_range()
creation of regular date sequences
~ Python's range() and NumPy's np.arange()

In [31]:
pd.date_range('2015-07-03', '2015-07-10')

DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05', '2015-07-06',
               '2015-07-07', '2015-07-08', '2015-07-09', '2015-07-10'],
              dtype='datetime64[ns]', freq='D')

In [32]:
pd.date_range('2015-07-03', periods=8)
pd.date_range('2015-07-30', periods=3)
pd.date_range('2020-02-27', periods=4)

DatetimeIndex(['2020-02-27', '2020-02-28', '2020-02-29', '2020-03-01'], dtype='datetime64[ns]', freq='D')

In [33]:
pd.date_range('2015-07-03', periods=8, freq='H')

DatetimeIndex(['2015-07-03 00:00:00', '2015-07-03 01:00:00',
               '2015-07-03 02:00:00', '2015-07-03 03:00:00',
               '2015-07-03 04:00:00', '2015-07-03 05:00:00',
               '2015-07-03 06:00:00', '2015-07-03 07:00:00'],
              dtype='datetime64[ns]', freq='H')

In [34]:
pd.period_range('2015-07', periods=8, freq='M')

PeriodIndex(['2015-07', '2015-08', '2015-09', '2015-10', '2015-11', '2015-12',
             '2016-01', '2016-02'],
            dtype='period[M]', freq='M')

In [35]:
pd.timedelta_range(0, periods=4, freq='H')
pd.timedelta_range(0, periods=4, freq='1.5D')

TimedeltaIndex(['0 days 00:00:00', '1 days 12:00:00', '3 days 00:00:00',
                '4 days 12:00:00'],
               dtype='timedelta64[ns]', freq='36H')

### Frequencies and Offsets

In [36]:
pd.timedelta_range(0, periods=9, freq="2H30T")
pd.timedelta_range(0, periods=9, freq="2.5H")

TimedeltaIndex(['0 days 00:00:00', '0 days 02:30:00', '0 days 05:00:00',
                '0 days 07:30:00', '0 days 10:00:00', '0 days 12:30:00',
                '0 days 15:00:00', '0 days 17:30:00', '0 days 20:00:00'],
               dtype='timedelta64[ns]', freq='150T')

In [37]:
from pandas.tseries.offsets import BDay
pd.date_range('2015-07-01', periods=5, freq=BDay())

DatetimeIndex(['2015-07-01', '2015-07-02', '2015-07-03', '2015-07-06',
               '2015-07-07'],
              dtype='datetime64[ns]', freq='B')

## Resampling, Shifting, and Windowing

In [38]:
from pandas_datareader import data

vti = data.DataReader('VTI', start='2016', end='2021',
                       data_source='yahoo')
# NotImplementedError: data_source='google' is not implemented
vti.head()

# For simplicity, we'll use just the closing price:
vti = vti['Close']

ModuleNotFoundError: No module named 'pandas_datareader'

In [None]:
# For simplicity, we'll use just the closing price:
vti = vti['Close']

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn; seaborn.set()


In [None]:
vti.plot();

### Resampling and converting frequencies

In [None]:
vti.plot(alpha=0.5, style='-',figsize=(12, 4))

# resample reports the average of the previous year
vti.resample('BA').mean().plot(style=':')   

# the value at the end of the year
vti.asfreq('BA').plot(style='--');
# BA: 	Business year end

plt.legend(['input', 'resample', 'asfreq'],
           loc='upper left');

In [None]:
fig, ax = plt.subplots(2, sharex=True)
data = vti.iloc[-15:]


#  default: non-business days are left as NA values and do not appear on the plot.
data.asfreq('D').plot(ax=ax[0], marker='o')
ax[0].legend(["default:NA values"])

# forward-filling and backward-filling.
data.asfreq('D', method='bfill').plot(ax=ax[1], style=':o')
data.asfreq('D', method='ffill').plot(ax=ax[1], style='--o')
ax[1].legend(["back-fill", "forward-fill"]);

### Time-shifts

In [None]:
fig, ax = plt.subplots(3, sharey=True,figsize=(15, 6))

# apply a frequency to the data
vti = vti.asfreq('D', method='pad')

vti.plot(ax=ax[0])            
vti.shift(-365).plot(ax=ax[1]) 
vti.tshift(-365).plot(ax=ax[2])

# legends and annotations
local_min = pd.to_datetime('2020-03-22')
offset = pd.Timedelta(-365, 'D')

ax[0].legend(['input'], loc=2)
ax[0].get_xticklabels()[2].set(weight='heavy', color='red')
ax[0].axvline(local_min, alpha=0.3, color='red')

ax[1].legend(['shift(365)'], loc=2)
ax[1].get_xticklabels()[2].set(weight='heavy', color='red')
ax[1].axvline(local_min + offset, alpha=0.3, color='red')

ax[2].legend(['tshift(365)'], loc=2)
ax[2].get_xticklabels()[1].set(weight='heavy', color='red')
ax[2].axvline(local_min + offset, alpha=0.3, color='red');

In [None]:
vti.plot().axvline('2020-03-22',alpha=0.3, color='red');

In [None]:
#  one-year return on investment
ROI = 100 * (vti.shift(-365) / vti - 1)
ROI.plot()
plt.ylabel('% Return on Investment');

In [None]:
#  one-month return on investment
ROI = 100 * (vti.shift(-30) / vti - 1)
ROI.plot()
plt.ylabel('% Return on Investment');

In [None]:
# tshift is deprecated and will be removed in a future version. Please use shift instead.
ROI = 100 * (vti.tshift(-30) / vti- 1)
ROI.plot()
plt.ylabel('% Return on Investment');

### Rolling windows

In [None]:
rolling = vti.rolling(365, center=True)

data = pd.DataFrame({'input': vti,
                     'one-year rolling_mean': rolling.mean(),
                     'one-year rolling_std': rolling.std()})
ax = data.plot(style=['-', '--', ':'])
ax.lines[0].set_alpha(0.3)