# Time Series Basics with Pandas

## What is the time series?

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
date=[datetime(2020,1,5),
      datetime(2020,1,10),
      datetime(2020,1,15),
      datetime(2020,1,20),
      datetime(2020,1,25)] 

In [3]:
ts=pd.Series(np.random.randn(5),index=date)
ts

2020-01-05    0.708354
2020-01-10   -0.269455
2020-01-15    0.530274
2020-01-20   -0.376605
2020-01-25    0.583312
dtype: float64

In [4]:
ts.index 

DatetimeIndex(['2020-01-05', '2020-01-10', '2020-01-15', '2020-01-20',
               '2020-01-25'],
              dtype='datetime64[ns]', freq=None)

## Time Series Data Structures

In [15]:
pd.to_datetime("05/09/2020")

Timestamp('2020-05-09 00:00:00')

In [18]:
dates=pd.to_datetime(
    [datetime(2020,7,5),
     "6th of July, 2020",
     "2020-Jul-7",
     "20200708"])

dates

DatetimeIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='datetime64[ns]', freq=None)

In [21]:
dates=pd.to_datetime(
    [datetime(2020,7,5),
     "6th of July, 2020",
     "2020-Jul-7",
     "20200708"])

dates

# dates.to_period("D")

DatetimeIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='datetime64[ns]', freq=None)

In [22]:
dates[0]

Timestamp('2020-07-05 00:00:00')

In [23]:
dates-dates[0]

TimedeltaIndex(['0 days', '1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None)

## Creating a Time Series

In [24]:
pd.date_range("2020-08-15","2020-09-01") 

DatetimeIndex(['2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18',
               '2020-08-19', '2020-08-20', '2020-08-21', '2020-08-22',
               '2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26',
               '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30',
               '2020-08-31', '2020-09-01'],
              dtype='datetime64[ns]', freq='D')

In [25]:
pd.date_range('2020-07-15', periods=10)

DatetimeIndex(['2020-07-15', '2020-07-16', '2020-07-17', '2020-07-18',
               '2020-07-19', '2020-07-20', '2020-07-21', '2020-07-22',
               '2020-07-23', '2020-07-24'],
              dtype='datetime64[ns]', freq='D')

In [26]:
pd.date_range("2020-07-15",
              periods=10,
              freq="H")

DatetimeIndex(['2020-07-15 00:00:00', '2020-07-15 01:00:00',
               '2020-07-15 02:00:00', '2020-07-15 03:00:00',
               '2020-07-15 04:00:00', '2020-07-15 05:00:00',
               '2020-07-15 06:00:00', '2020-07-15 07:00:00',
               '2020-07-15 08:00:00', '2020-07-15 09:00:00'],
              dtype='datetime64[ns]', freq='H')

In [27]:
pd.period_range("2020-10", 
                periods=10,
                freq="M")

PeriodIndex(['2020-10', '2020-11', '2020-12', '2021-01', '2021-02', '2021-03',
             '2021-04', '2021-05', '2021-06', '2021-07'],
            dtype='period[M]')

In [28]:
pd.timedelta_range(0,periods=8,freq="H")

TimedeltaIndex(['0 days 00:00:00', '0 days 01:00:00', '0 days 02:00:00',
                '0 days 03:00:00', '0 days 04:00:00', '0 days 05:00:00',
                '0 days 06:00:00', '0 days 07:00:00'],
               dtype='timedelta64[ns]', freq='H')

In [30]:
long_ts=pd.Series(
    np.random.randn(1000),
    index=pd.date_range("1/1/2020",
                        periods=1000))
long_ts.head()

2020-01-01    0.675645
2020-01-02    0.658096
2020-01-03    1.148538
2020-01-04   -1.549556
2020-01-05    2.665875
Freq: D, dtype: float64

In [39]:
long_ts["2021-01"].head()

2021-01-01   -0.040264
2021-01-02   -0.521694
2021-01-03   -0.354232
2021-01-04   -1.467126
2021-01-05   -0.760368
Freq: D, dtype: float64

In [32]:
long_ts["2020-10"].head(15)

2020-10-01    0.160529
2020-10-02   -0.128764
2020-10-03    1.365966
2020-10-04    0.173414
2020-10-05   -1.693958
2020-10-06   -2.524092
2020-10-07    0.682938
2020-10-08    0.086161
2020-10-09   -0.653755
2020-10-10    0.613755
2020-10-11    1.886422
2020-10-12   -0.342289
2020-10-13    0.579203
2020-10-14   -2.516670
2020-10-15    1.476417
Freq: D, dtype: float64

In [33]:
long_ts[datetime(2022,9,20):] 

2022-09-20    0.294222
2022-09-21   -0.086294
2022-09-22    0.812461
2022-09-23    0.180221
2022-09-24    1.521097
2022-09-25   -0.142935
2022-09-26   -1.655461
Freq: D, dtype: float64

## The Important Methods Used in Time Series

In [40]:
ts

2020-01-05    0.708354
2020-01-10   -0.269455
2020-01-15    0.530274
2020-01-20   -0.376605
2020-01-25    0.583312
dtype: float64

In [42]:
ts.truncate(before="1/15/2020")

2020-01-15    0.530274
2020-01-20   -0.376605
2020-01-25    0.583312
dtype: float64

In [None]:
date=pd.date_range("1/1/2020",
                   periods=100,
                   freq="W-SUN")

In [None]:
long_df=pd.DataFrame(np.random.randn(100,4),
                    index=date,
                    columns=list("ABCD"))
long_df.head()

In [None]:
long_df["2020-10"]