# Time Series Basics

## Importing Time Series Data from csv-Files

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv", parse_dates = ["datetime"], index_col= "datetime")

In [None]:
temp.head()

In [None]:
temp

In [None]:
temp.loc['2014-12-15 10']

In [None]:
temp.index

In [None]:
temp.index[0]

## Converting strings to datetime objects with pd.to_datetime()

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.datetime[0]

In [None]:
pd.to_datetime(temp.datetime)

In [None]:
temp = temp.set_index(pd.to_datetime(temp.datetime)).drop("datetime", axis = 1)

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.iloc[0,0]

In [None]:
pd.to_datetime("2015-05-20 10:30:20")

In [None]:
pd.to_datetime("20150520")

In [None]:
pd.to_datetime("2015/05/20")

In [None]:
pd.to_datetime("2015 05 20")

In [None]:
pd.to_datetime("2015-20-05")

In [None]:
pd.to_datetime("2015 May 20")

In [None]:
pd.to_datetime("May 2015 20")

In [None]:
pd.to_datetime("2015 20th may")

In [None]:
pd.to_datetime(["2015-05-20", "Feb 20 2015"])

In [None]:
pd.to_datetime(["2015-05-20", "Feb 20 2015", "Elephant"], errors="coerce")

## Indexing and Slicing Time Series

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col= "datetime")

In [None]:
temp.head()

In [None]:
temp.info()

In [None]:
temp.loc["2013-01-01 01:00:00"]

In [None]:
temp.loc["2015"]

In [None]:
temp.loc["2015-05"]

In [None]:
temp.loc["2015-05-20"]

In [None]:
temp.loc["2015-05-20 10:00:00"]

In [None]:
#temp.loc["2015-05-20 10:30:00"]

In [None]:
temp.loc["2015-01-01" : "2015-12-31"]

In [None]:
temp.loc["2015-01-01" : "2015-12-31"].equals(temp.loc["2015"])

In [None]:
temp.loc["2015-04-15" : "2016-02-23"]

In [None]:
temp.loc["2015-05-20":]

In [None]:
temp.loc[:"2015-05-20"]

In [None]:
temp.loc["20FEBRUARY2015"]

In [None]:
#temp.loc[["2015-05-20 10:00:00", "2015-05-20 12:00:00"]]

In [None]:
two_timestamps = pd.to_datetime(["2015-05-20 10:00:00", "2015-05-20 12:00:00"])
two_timestamps

In [None]:
temp.loc[two_timestamps]

## Downsampling Time Series with resample()

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("seaborn")

In [2]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col = "datetime")

In [3]:
temp.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [4]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [11]:
list(temp.resample("D"))[0][1]

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3
2013-01-01 05:00:00,8.7,-2.5
2013-01-01 06:00:00,6.9,-3.2
2013-01-01 07:00:00,7.8,-3.4
2013-01-01 08:00:00,6.7,-3.0
2013-01-01 09:00:00,6.6,-1.8


In [12]:
temp.head(25)

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3
2013-01-01 05:00:00,8.7,-2.5
2013-01-01 06:00:00,6.9,-3.2
2013-01-01 07:00:00,7.8,-3.4
2013-01-01 08:00:00,6.7,-3.0
2013-01-01 09:00:00,6.6,-1.8


In [13]:
temp.resample("D").sum()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,212.6,-9.7
2013-01-02,222.8,77.0
2013-01-03,247.3,-58.2
2013-01-04,276.3,-49.7
2013-01-05,266.0,19.6
...,...,...
2016-12-27,291.7,253.9
2016-12-28,346.4,96.4
2016-12-29,385.1,31.5
2016-12-30,382.4,52.9


In [14]:
temp.resample("2H").first()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 04:00:00,8.8,-2.3
2013-01-01 06:00:00,6.9,-3.2
2013-01-01 08:00:00,6.7,-3.0
...,...,...
2016-12-31 14:00:00,12.7,-1.3
2016-12-31 16:00:00,12.6,1.1
2016-12-31 18:00:00,13.2,3.4
2016-12-31 20:00:00,13.2,5.7


In [15]:
temp.resample("W").mean()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-06,10.214583,0.222917
2013-01-13,10.494048,4.245238
2013-01-20,11.119643,3.924405
2013-01-27,15.586905,-4.660714
2013-02-03,11.569643,1.502381
...,...,...
2016-12-04,12.975595,8.024405
2016-12-11,13.738095,1.922619
2016-12-18,13.466667,0.052381
2016-12-25,13.007143,1.149405


In [16]:
temp.resample("W-Wed").mean()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-02,9.070833,1.402083
2013-01-09,11.033333,1.033929
2013-01-16,8.870238,6.001190
2013-01-23,14.678571,1.010714
2013-01-30,12.554762,-4.382738
...,...,...
2016-12-07,13.205357,5.964286
2016-12-14,14.490476,1.228571
2016-12-21,13.209524,-2.248810
2016-12-28,11.930357,4.688095


In [19]:
temp.resample("M").mean()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-31,11.596237,1.12957
2013-02-28,12.587202,0.617857
2013-03-31,15.069946,3.71922
2013-04-30,16.487361,10.699306
2013-05-31,19.00578,15.824328
2013-06-30,19.905417,22.225694
2013-07-31,22.093952,26.329704
2013-08-31,21.513172,22.480376
2013-09-30,22.404861,18.291806
2013-10-31,16.62043,14.335215


In [20]:
temp.resample("MS").mean()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,11.596237,1.12957
2013-02-01,12.587202,0.617857
2013-03-01,15.069946,3.71922
2013-04-01,16.487361,10.699306
2013-05-01,19.00578,15.824328
2013-06-01,19.905417,22.225694
2013-07-01,22.093952,26.329704
2013-08-01,21.513172,22.480376
2013-09-01,22.404861,18.291806
2013-10-01,16.62043,14.335215


In [21]:
temp.resample("MS", loffset="14D").mean()


>>> df.resample(freq="3s", loffset="8H")

becomes:

>>> from pandas.tseries.frequencies import to_offset
>>> df = df.resample(freq="3s").mean()
>>> df.index = df.index.to_timestamp() + to_offset("8H")

  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-15,11.596237,1.12957
2013-02-15,12.587202,0.617857
2013-03-15,15.069946,3.71922
2013-04-15,16.487361,10.699306
2013-05-15,19.00578,15.824328
2013-06-15,19.905417,22.225694
2013-07-15,22.093952,26.329704
2013-08-15,21.513172,22.480376
2013-09-15,22.404861,18.291806
2013-10-15,16.62043,14.335215


In [22]:
temp.resample("Q").mean()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-03-31,13.099212,1.862361
2013-06-30,18.472115,16.245101
2013-09-30,21.999638,22.411594
2013-12-31,15.047781,8.084918
2014-03-31,15.508287,-0.57088
2014-06-30,18.891255,16.253938
2014-09-30,22.281295,21.738315
2014-12-31,12.281748,7.855933
2015-03-31,11.869306,-3.340509
2015-06-30,16.334386,16.445238


In [23]:
temp.resample("Q-Feb").mean()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-02-28,12.066525,0.886723
2013-05-31,16.859973,10.07423
2013-08-31,21.184601,23.694384
2013-11-30,18.028755,13.258288
2014-02-28,14.675,-0.301713
2014-05-31,17.562047,9.645652
2014-08-31,21.691168,22.559284
2014-11-30,16.640522,13.09707
2015-02-28,9.822593,-2.481574
2015-05-31,14.185054,9.975181


In [24]:
temp.resample("Y").mean()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-12-31,17.174229,12.196153
2014-12-31,17.245616,11.370959
2015-12-31,17.19153,11.795194
2016-12-31,18.330305,12.90847


In [25]:
temp.resample("YS").mean()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,17.174229,12.196153
2014-01-01,17.245616,11.370959
2015-01-01,17.19153,11.795194
2016-01-01,18.330305,12.90847
