# Statistics Introduction Applied to Data Science
## Bonus : Two - Time Series
### Pandas Time Series - Date and Time

In [1]:
# Libraries needs
import numpy    as np
import pandas   as pd
import datetime as dt

In [2]:
# Using the pandas library and python's builtin datetime class, you can create a pandas Timestamp.
time_stamp = pd.Timestamp(dt.datetime(2021,1,1))
time_stamp

Timestamp('2021-01-01 00:00:00')

In [3]:
# We can use a date string instead of a datetime object, both produce the same result.
time_stamp = pd.Timestamp('2021-01-01')
time_stamp

Timestamp('2021-01-01 00:00:00')

In [4]:
# The pandas TimeStamp has attributes. So you can access various time aspects of your data
print(time_stamp.year)
print(time_stamp.month)
print(time_stamp.day)

2021
1
1


In [5]:
# Retrieve the name of the weekday
time_stamp.day_name()

'Friday'

In [6]:
# Pandas also has a data type for time periods.
# The period object always has a frequency, with months as the default.
period_m = pd.Period('2021-01')
period_m

Period('2021-01', 'M')

In [7]:
# Depending of set value, so will the frequency
period_d = pd.Period('2021-02-14')
period_d

Period('2021-02-14', 'D')

In [8]:
# You can convert a period to a timestamp object
period2 = period_d.to_timestamp()
print(type(period2))
print(period2)

<class 'pandas._libs.tslibs.timestamps.Timestamp'>
2021-02-14 00:00:00


In [9]:
# You can convert a timestamp to period object
period3 = period2.to_period('M')
print(type(period3))
print(period3)

<class 'pandas._libs.tslibs.period.Period'>
2021-02


In [10]:
# If you use dates which start with the day first (i.e. European style), you can pass the dayfirst flag
pd.to_datetime(['02-01-2017', '01-03-2018'], dayfirst=True)

DatetimeIndex(['2017-01-02', '2018-03-01'], dtype='datetime64[ns]', freq=None)

## Basic date arithmetic

In [11]:
# Add to a period object. Depending of frequency, it will affect the period object
period_d += 2
period_d

Period('2021-02-16', 'D')

In [12]:
period_m += 3
period_m

Period('2021-04', 'M')

In [13]:
# You can create a sequences of dates and times
idx = pd.date_range(start='2021-01', periods=12, freq='M')
print(type(idx))
print(idx)

<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31'],
              dtype='datetime64[ns]', freq='M')


In [14]:
# Generate sequences of fixed-frequency dates and time spans
# Frequency is set to daily
idx2 = pd.date_range('2021-01-01', periods=7, freq='D')
idx2

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06', '2021-01-07'],
              dtype='datetime64[ns]', freq='D')

In [15]:
# You can access to index element by index
idx[11]

Timestamp('2021-12-31 00:00:00', freq='M')

In [16]:
# You can convert the index to PeriodIndex
pidx = idx.to_period()
print(type(pidx))
print(idx)

<class 'pandas.core.indexes.period.PeriodIndex'>
DatetimeIndex(['2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31'],
              dtype='datetime64[ns]', freq='M')


In [17]:
# Create a time series by setting the DateTimeIndex as the index of your data frame.
pd.DataFrame({'data': idx}).info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   data    12 non-null     datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 224.0 bytes


In [18]:
# Create 12 rows with two columns of random data to match the DateTimeIndex
data = np.random.random(size=(12,2))
data

array([[0.89897964, 0.32498041],
       [0.80771292, 0.71992236],
       [0.934777  , 0.21336454],
       [0.35488966, 0.27521662],
       [0.81890663, 0.4828202 ],
       [0.66229194, 0.39581727],
       [0.74099546, 0.09226887],
       [0.12717119, 0.52172807],
       [0.72669452, 0.06296931],
       [0.00595929, 0.4024398 ],
       [0.68145289, 0.1798756 ],
       [0.40132486, 0.76364475]])

In [19]:
# Provide the dates to the DataFrame constructor, and you have created your first time series with
# 12 monthly timestamps.
df = pd.DataFrame(data=data, index=idx)

# rename columns dataframe
df.rename(columns = {0:'value1', 1:'value2'}, inplace = True)

print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 12 entries, 2021-01-31 to 2021-12-31
Freq: M
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   value1  12 non-null     float64
 1   value2  12 non-null     float64
dtypes: float64(2)
memory usage: 288.0 bytes
None


Unnamed: 0,value1,value2
2021-01-31,0.89898,0.32498
2021-02-28,0.807713,0.719922
2021-03-31,0.934777,0.213365
2021-04-30,0.35489,0.275217
2021-05-31,0.818907,0.48282


In [20]:
# End Notebook