# **Time series basics**

## **Importing time series data from csv-file**

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv('temp.csv', parse_dates = ['datetime'], index_col = 'datetime')

In [None]:
temp.head(24)

In [None]:
temp.info()

In [None]:
temp.iloc[0, 0]

In [None]:
temp.index

In [None]:
temp.index[0]

## **Converting strings to datetime objects with pd.to_datatime()** 

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv('temp.csv')

In [None]:
temp.info()

In [None]:
temp.head()

In [None]:
temp.datetime

In [None]:
pd.to_datetime(temp.datetime)

In [None]:
temp = temp.set_index(pd.to_datetime(temp.datetime)).drop(labels = 'datetime', axis = 1)

In [None]:
temp.info()

In [None]:
pd.to_datetime('2007-05-19')

In [None]:
pd.to_datetime('2007 05 19')

In [None]:
pd.to_datetime('2007-19-05', format = '%Y-%d-%m')

## **Initial analysis/Visual inspection**

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv('temp.csv', parse_dates = ['datetime'], index_col = 'datetime')

In [None]:
temp.info()

In [None]:
temp.describe()

In [None]:
temp.head()

In [None]:
temp.head()

In [None]:
temp.LA.value_counts()

In [None]:
import matplotlib.pyplot as plt

In [None]:
temp.plot(figsize = (15, 7))
plt.show()

In [None]:
temp.plot(figsize = (15, 7), subplots = True, layout = (1, 2), sharey = True)
plt.show()

## **Indexing and slicing**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
temp = pd.read_csv('temp.csv', parse_dates = ['datetime'], index_col = 'datetime')

In [None]:
temp.info()

In [None]:
temp.loc['2013-01-01 01:00:00']

In [None]:
temp.loc['2015']

In [None]:
temp.loc['2015', 'NY']

In [None]:
plt.figure(figsize = (12, 7))
sns.lineplot(data = temp.loc['2015'])
plt.show()

In [None]:
temp.loc['2015-05']

In [None]:
temp.loc['2015-05-01':'2015-05-03']

In [None]:
temp.loc['2015-05-01':'2016-02-03']

In [None]:
temp.loc['2015-06-01':]

In [None]:
temp.loc[:'2015-06-01']

In [None]:
two_timestamps = pd.to_datetime(['2015-05-01 10:00:00', '2015-05-01 12:00:00'])

In [None]:
temp.loc[two_timestamps]

## **Customizing datetime index using pd.date_range()**

In [None]:
import pandas as pd

In [None]:
pd.to_datetime(['2015-05-20', 'Feb 20 2015'], format = 'mixed')

In [None]:
pd.date_range(start = '2024-05-01', end = '2024-05-31')

In [None]:
pd.date_range(start = '2024-05-01', periods = 31, freq = 'D')

In [None]:
pd.date_range(start = '2024-05-01', periods = 31, freq = 'h')

In [None]:
pd.date_range(start = '2024-05-01', periods = 31, freq = 'H')

In [None]:
pd.date_range(start = '2024-05-01', periods = 31, freq = '2h')

In [None]:
pd.date_range(start = '2024-05-01', periods = 31, freq = 'bh')

In [None]:
pd.date_range(start = '2024-05-01', periods = 5, freq = 'W')

In [None]:
pd.date_range(start = '2024-05-01', periods = 5, freq = 'W-Mon')

In [None]:
pd.date_range(start = '2024-05-01', periods = 5, freq = 'ME')

In [None]:
pd.date_range(start = '2024-05-01', periods = 5, freq = pd.DateOffset(months = 2))

In [None]:
pd.date_range(start = '2024-05-01', periods = 5, freq = 'QS')

In [None]:
pd.date_range(start = '2024-05-01', periods = 5, freq = 'AS')

In [None]:
pd.date_range(start = '2024-05-01', periods = 31, freq = 'YE')

In [None]:
pd.date_range(start = '2024-05-01', periods = 10, freq = pd.DateOffset(years = 1))

In [None]:
pd.date_range(start = '2024-05-01', periods = 10, freq = '2d2h')

## **Downsampline timeseries with resample()**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
temp = pd.read_csv('temp.csv', parse_dates = ['datetime'], index_col = 'datetime')

In [None]:
temp.info()

In [None]:
temp.head()

In [None]:
temp.tail()

In [None]:
temp.resample(rule = 'D')

In [None]:
list(temp.resample(rule = 'D'))

In [None]:
list(temp.resample(rule = 'D'))[0]

In [None]:
list(temp.resample(rule = 'D'))[0][1]

In [None]:
temp.resample(rule = 'D').first()

In [None]:
temp.resample(rule = 'D').last()

In [None]:
temp.resample(rule = 'D').mean()

In [None]:
list(temp.resample(rule = 'D'))[0][1].loc[:, 'LA'].mean()

In [None]:
temp.resample(rule = '2h').first()

In [None]:
list(temp.resample(rule = '2h'))[1][1]

In [None]:
temp.resample(rule = 'W-Mon').first()

In [None]:
list(temp.resample(rule = 'W-Mon'))[0][1]

In [None]:
temp.resample(rule = 'W-Mon').mean()

In [None]:
list(temp.resample(rule = 'W-Mon'))[0][1].mean()

In [None]:
temp.resample(rule = 'ME', kind = 'period').mean()

In [None]:
# This is cell works as the above one, but explicitely transforms index to 'period'
temp_1 = temp.resample(rule = 'ME').mean()
# temp_1.index = temp_1.index.astype('period[M]')
temp_1.index = temp_1.index.to_period()

In [None]:
temp_1

In [None]:
temp_1.plot(kind = 'line', figsize = (10, 5))
plt.show()

In [None]:
temp_2 = temp.resample(rule = 'W').mean()
temp_2.index = temp_2.index.to_period()

In [None]:
temp_2

In [None]:
temp_3 = temp.resample(rule = 'QS').mean()
temp_3.index = temp_3.index.to_period()
temp_3

## **The PeriodIndex object**

In [None]:
import pandas as pd

In [None]:
temp = pd.read_csv('temp.csv', parse_dates = ['datetime'], index_col = 'datetime')

In [None]:
temp.info()

In [None]:
temp.head()

In [None]:
temp.tail()

In [None]:
temp_m1 = temp.resample(rule = 'M').mean()

In [None]:
temp_m1

In [None]:
temp_m1.loc['2013']

In [None]:
temp_m1.loc['2013-01']

In [None]:
temp_m1.loc['2013-01':'2013-08']

In [None]:
temp_m2 = temp.resample(rule = 'MS').mean()
temp_m2.index = temp_m2.index.to_period()

In [None]:
temp_m2

In [None]:
temp_m2.loc['2013']

In [None]:
temp_m2.loc['2013-01']

In [None]:
temp_m2.loc['2013-01':'2014-02']

## **Advenced indexing with reindex()**

In [1]:
import pandas as pd

In [4]:
temp = pd.read_csv('temp.csv', parse_dates = ['datetime'], index_col = 'datetime')

In [5]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [6]:
temp.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [7]:
temp.tail()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-12-31 19:00:00,13.5,4.6
2016-12-31 20:00:00,13.2,5.7
2016-12-31 21:00:00,12.8,5.8
2016-12-31 22:00:00,12.3,5.7
2016-12-31 23:00:00,11.9,5.5


In [8]:
temp_D = temp.resample(rule = 'D').mean()

In [9]:
temp_D

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,8.858333,-0.404167
2013-01-02,9.283333,3.208333
2013-01-03,10.304167,-2.425000
2013-01-04,11.512500,-2.070833
2013-01-05,11.083333,0.816667
...,...,...
2016-12-27,12.154167,10.579167
2016-12-28,14.433333,4.016667
2016-12-29,16.045833,1.312500
2016-12-30,15.933333,2.204167


In [15]:
xmas = pd.date_range(end = '2016-12-24', periods = 10, freq = pd.DateOffset(years = 1))
xmas

DatetimeIndex(['2007-12-24', '2008-12-24', '2009-12-24', '2010-12-24',
               '2011-12-24', '2012-12-24', '2013-12-24', '2014-12-24',
               '2015-12-24', '2016-12-24'],
              dtype='datetime64[ns]', freq='<DateOffset: years=1>')

In [18]:
temp_D.reindex(xmas)

Unnamed: 0,LA,NY
2007-12-24,,
2008-12-24,,
2009-12-24,,
2010-12-24,,
2011-12-24,,
2012-12-24,,
2013-12-24,16.158333,3.4875
2014-12-24,10.7125,8.045833
2015-12-24,10.716667,17.4625
2016-12-24,11.820833,4.045833
