## Time Series Analysis

In [23]:
import pandas as pd
import numpy as np
from datetime import datetime

In [24]:
date=[datetime(2020,1,5),
         datetime(2020,1,10),
         datetime(2020,1,15),
         datetime(2020,1,20),
          datetime(2020,1,25)]

In [25]:
date

[datetime.datetime(2020, 1, 5, 0, 0),
 datetime.datetime(2020, 1, 10, 0, 0),
 datetime.datetime(2020, 1, 15, 0, 0),
 datetime.datetime(2020, 1, 20, 0, 0),
 datetime.datetime(2020, 1, 25, 0, 0)]

In [26]:
ts=pd.Series(np.random.randn(5))

In [27]:
ts

0    0.965983
1   -0.631948
2   -0.377389
3    0.960195
4   -0.069956
dtype: float64

In [28]:
ts=pd.Series(np.random.randn(5),index=date)

In [29]:
ts

2020-01-05    2.080772
2020-01-10   -0.497447
2020-01-15    1.446559
2020-01-20    0.098586
2020-01-25   -0.293919
dtype: float64

In [30]:
ts.index

DatetimeIndex(['2020-01-05', '2020-01-10', '2020-01-15', '2020-01-20',
               '2020-01-25'],
              dtype='datetime64[ns]', freq=None)

**Time Series Data Structures**

In [31]:
pd.to_datetime("01/01/2020")

Timestamp('2020-01-01 00:00:00')

In [32]:
dates=pd.to_datetime(
    [datetime(2020,7,5),
     "6th of July, 2020",
     "2020-Jul-7",
     "20200708"])
dates

DatetimeIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='datetime64[ns]', freq=None)

In [34]:
dates.to_period('D')

PeriodIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='period[D]')

In [35]:
dates[0]

Timestamp('2020-07-05 00:00:00')

In [36]:
dates-dates[0]

TimedeltaIndex(['0 days', '1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None)

## Creating a Time Series

In [38]:
pd.date_range("12/2/2019","12/25/2019")

DatetimeIndex(['2019-12-02', '2019-12-03', '2019-12-04', '2019-12-05',
               '2019-12-06', '2019-12-07', '2019-12-08', '2019-12-09',
               '2019-12-10', '2019-12-11', '2019-12-12', '2019-12-13',
               '2019-12-14', '2019-12-15', '2019-12-16', '2019-12-17',
               '2019-12-18', '2019-12-19', '2019-12-20', '2019-12-21',
               '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25'],
              dtype='datetime64[ns]', freq='D')

In [39]:
# Specify dates with dayfirst=True for DD/MM/YYYY format
dates1 = pd.date_range(start=pd.to_datetime("2/12/2019", dayfirst=True),
                      end=pd.to_datetime("25/12/2019", dayfirst=True))

dates

DatetimeIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='datetime64[ns]', freq=None)

In [40]:
pd.date_range('2020-07-15', periods=10)

DatetimeIndex(['2020-07-15', '2020-07-16', '2020-07-17', '2020-07-18',
               '2020-07-19', '2020-07-20', '2020-07-21', '2020-07-22',
               '2020-07-23', '2020-07-24'],
              dtype='datetime64[ns]', freq='D')

In [56]:
pd.date_range("2020-07-15",
              periods=10,
              freq="h")

DatetimeIndex(['2020-07-15 00:00:00', '2020-07-15 01:00:00',
               '2020-07-15 02:00:00', '2020-07-15 03:00:00',
               '2020-07-15 04:00:00', '2020-07-15 05:00:00',
               '2020-07-15 06:00:00', '2020-07-15 07:00:00',
               '2020-07-15 08:00:00', '2020-07-15 09:00:00'],
              dtype='datetime64[ns]', freq='h')

In [43]:
pd.period_range("2020-10", 
                periods=10,
                freq="M")

PeriodIndex(['2020-10', '2020-11', '2020-12', '2021-01', '2021-02', '2021-03',
             '2021-04', '2021-05', '2021-06', '2021-07'],
            dtype='period[M]')

In [52]:
pd.timedelta_range(0,periods=8,freq="h")

TimedeltaIndex(['0 days 00:00:00', '0 days 01:00:00', '0 days 02:00:00',
                '0 days 03:00:00', '0 days 04:00:00', '0 days 05:00:00',
                '0 days 06:00:00', '0 days 07:00:00'],
               dtype='timedelta64[ns]', freq='h')

In [53]:
stamp=ts.index[1]
stamp

Timestamp('2020-01-10 00:00:00')

In [54]:
ts[stamp]

-0.4974467290562969

In [58]:
ts["25.1.2020"]

-0.2939191101043038

In [59]:
ts["20200125"]

-0.2939191101043038

In [61]:
long_ts=pd.Series(
    np.random.randn(1000),
    index=pd.date_range("1/1/2020",
                        periods=1000))
long_ts.head()

2020-01-01   -1.001817
2020-01-02    0.488434
2020-01-03   -0.194605
2020-01-04   -0.089838
2020-01-05    0.478526
Freq: D, dtype: float64

In [62]:
long_ts["2020"].head()

2020-01-01   -1.001817
2020-01-02    0.488434
2020-01-03   -0.194605
2020-01-04   -0.089838
2020-01-05    0.478526
Freq: D, dtype: float64

In [63]:
long_ts["2020-10"].head(15)

2020-10-01    0.203914
2020-10-02   -0.486768
2020-10-03    2.026267
2020-10-04    1.277859
2020-10-05   -1.193348
2020-10-06    0.397402
2020-10-07    1.613501
2020-10-08   -1.521845
2020-10-09    0.491278
2020-10-10   -1.142572
2020-10-11    1.338760
2020-10-12   -2.007578
2020-10-13   -0.396028
2020-10-14   -0.150378
2020-10-15    0.533437
Freq: D, dtype: float64

**Important Methods in Time Series**

In [64]:
ts

2020-01-05    2.080772
2020-01-10   -0.497447
2020-01-15    1.446559
2020-01-20    0.098586
2020-01-25   -0.293919
dtype: float64

In [65]:
ts.truncate(after="1/15/2020")

2020-01-05    2.080772
2020-01-10   -0.497447
2020-01-15    1.446559
dtype: float64

In [66]:
date=pd.date_range("1/1/2020",
                   periods=100,
                   freq="W-SUN")

In [67]:
long_df=pd.DataFrame(np.random.randn(100,4),
                    index=date,
                    columns=list("ABCD"))
long_df.head()

Unnamed: 0,A,B,C,D
2020-01-05,0.484097,0.365644,2.393421,0.039768
2020-01-12,0.618473,1.01835,-0.339003,-2.552301
2020-01-19,0.810572,-1.564491,1.080764,-1.530435
2020-01-26,0.77964,0.498951,1.205404,0.883679
2020-02-02,-0.770789,-3.189544,-0.370595,-0.47171


In [71]:
long_df.loc["2020-10"]

Unnamed: 0,A,B,C,D
2020-10-04,-1.036029,-0.82333,-0.120775,2.700567
2020-10-11,0.036441,-0.078198,1.512189,0.02555
2020-10-18,0.869602,0.209151,-0.910959,-0.055384
2020-10-25,-0.854653,-1.529559,0.339011,0.756713


In [72]:
date=pd.DatetimeIndex(
    ["1/1/2020","1/2/2020","1/2/2020",
     "1/2/2020","1/3/2020"])
ts1=pd.Series(np.arange(5),index=date)
ts1

2020-01-01    0
2020-01-02    1
2020-01-02    2
2020-01-02    3
2020-01-03    4
dtype: int32

In [73]:
ts1.index.is_unique 

False

In [74]:
group=ts1.groupby(level=0) 

In [75]:
group.count()

2020-01-01    1
2020-01-02    3
2020-01-03    1
dtype: int64

In [76]:
group.mean()

2020-01-01    0.0
2020-01-02    2.0
2020-01-03    4.0
dtype: float64