### This file demonstartes working with Dates and Datetimes in `pandas`

In [82]:
import pandas as pd
import numpy as np

adate = '01-01-2000'
# Use to_datetime() function to get Datetime
adatetime = pd.to_datetime(adate)

print(adatetime)
print(type(adatetime))

2000-01-01 00:00:00
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


#### Dates in a `DataFrame`
Convert date column to Datetime

In [87]:
data = {'price': [10.24, 11.34, 14.72, 13.90, 15.03],
       'date': ['11-01-2020', '11-02-2021', '11-03-2021', '11-04-2021', '11-05-2021']}
data['date'] = pd.to_datetime(data['date'])
sample_df = pd.DataFrame(data)
sample_df

Unnamed: 0,price,date
0,10.24,2020-11-01
1,11.34,2021-11-02
2,14.72,2021-11-03
3,13.9,2021-11-04
4,15.03,2021-11-05



#### Extracting information from `datetime` objects

In [92]:
oil = pd.read_csv('data/oil.csv')
oil.head()
print(oil['date'].values)
oil['date'] = pd.to_datetime(oil['date'])
oil['month'] = oil['date'].dt.month


# Answer check
print('Counts of months in data\n==================')
print(oil['month'].value_counts())
oil

['2013-01-01' '2013-01-02' '2013-01-03' ... '2017-08-29' '2017-08-30'
 '2017-08-31']
Counts of months in data
month
1     111
5     111
7     111
3     110
8     110
4     107
6     107
2     101
12     90
10     89
9      87
11     84
Name: count, dtype: int64


Unnamed: 0,date,dcoilwtico,month
0,2013-01-01,,1
1,2013-01-02,93.14,1
2,2013-01-03,92.97,1
3,2013-01-04,93.12,1
4,2013-01-07,93.20,1
...,...,...,...
1213,2017-08-25,47.65,8
1214,2017-08-28,46.40,8
1215,2017-08-29,46.46,8
1216,2017-08-30,45.96,8


In [106]:
print(oil['date'].values)

# Extract month field after converting date column to Datetime
oil['date'] = pd.to_datetime(oil['date'])
oil['month'] = oil['date'].dt.month

print('Counts of months in data\n==================')
print(oil['month'].value_counts())
oil.head()

['2013-01-01T00:00:00.000000000' '2013-01-02T00:00:00.000000000'
 '2013-01-03T00:00:00.000000000' ... '2017-08-29T00:00:00.000000000'
 '2017-08-30T00:00:00.000000000' '2017-08-31T00:00:00.000000000']
Counts of months in data
month
1     111
5     111
7     111
3     110
8     110
4     107
6     107
2     101
12     90
10     89
9      87
11     84
Name: count, dtype: int64


Unnamed: 0,date,dcoilwtico,month
0,2013-01-01,,1
1,2013-01-02,93.14,1
2,2013-01-03,92.97,1
3,2013-01-04,93.12,1
4,2013-01-07,93.2,1


#### Slicing data with `datetime` index

Using the `msft` data, change the `Date` column to `datetime` and set it to the index of the DataFrame.  Use the datetime index to select days between June 21st and September 22nd.  Assign the slice to the variable `msft_summer` below.  

In [109]:
msft = pd.read_csv('data/msft.csv')
msft.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 253 entries, 0 to 252
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       253 non-null    object 
 1   High       253 non-null    float64
 2   Low        253 non-null    float64
 3   Open       253 non-null    float64
 4   Close      253 non-null    float64
 5   Volume     253 non-null    float64
 6   Adj Close  253 non-null    float64
dtypes: float64(6), object(1)
memory usage: 14.0+ KB


In [113]:
msft.set_index(pd.to_datetime(msft['Date']), inplace=True)

msft_summer = msft['2020-06-21': '2020-09-22']
msft_summer.head()

Unnamed: 0_level_0,Date,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-06-22,2020-06-22,200.759995,195.229996,195.789993,200.570007,32818900.0,197.904968
2020-06-23,2020-06-23,203.949997,201.429993,202.089996,201.910004,30917400.0,199.227173
2020-06-24,2020-06-24,203.25,196.559998,201.600006,197.839996,36740600.0,195.211227
2020-06-25,2020-06-25,200.610001,195.470001,197.800003,200.339996,27803900.0,197.678024
2020-06-26,2020-06-26,199.889999,194.880005,199.729996,196.330002,54675800.0,193.721298
