## Pandas Datetime

In [1]:
import pandas as pd

In [2]:
dates = ['2017-04-02','April 2, 2017', '2 April 2017', '02/04/2017', '2017.04.02', '20170402']
pd.to_datetime(dates)

DatetimeIndex(['2017-04-02', '2017-04-02', '2017-04-02', '2017-02-04',
               '2017-04-02', '2017-04-02'],
              dtype='datetime64[ns]', freq=None)

In [3]:
dt = ['2017-01-05 2:30:00 PM', 'Jan 5, 2017 14:30:00', '01/05/2016', '17.01.05', '2017/01/05','20170105']
pd.to_datetime(dt)
# when no time is mentioned, it considers 00 as time

DatetimeIndex(['2017-01-05 14:30:00', '2017-01-05 14:30:00',
               '2016-01-05 00:00:00', '2005-01-17 00:00:00',
               '2017-01-05 00:00:00', '2017-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)

In [4]:
# For European style dates the system may read date in American style.
# Therefore, we have to explicitly mention date using 'dayfirst'

print(pd.to_datetime('03-11-2020'), '- This is NOT what we want')  # this is European style with dd/mm/yyyy
print(pd.to_datetime('03-11-2020', dayfirst = True), '- This is what we want')

2020-03-11 00:00:00 - This is NOT what we want
2020-11-03 00:00:00 - This is what we want


## Custom date time format

In [5]:
# The following date format is unknown to python. Therefore, use format parameter.
pd.to_datetime('2017$03$24', format = '%Y$%m$%d')

Timestamp('2017-03-24 00:00:00')

In [6]:
pd.to_datetime('2017#03#24', format = '%Y#%m#%d')

Timestamp('2017-03-24 00:00:00')

## Handling invalid dates

In [7]:
pd.to_datetime(['2017-01-06', 'Jan 6, 2017', 'abc'], errors='ignore')
# This takes the non-date string as it is.

Index(['2017-01-06', 'Jan 6, 2017', 'abc'], dtype='object')

In [8]:
pd.to_datetime(['2017-01-06', 'Jan 6, 2017', 'abc'], errors ='coerce')
# This does not take non-date strings and takes only date formats

DatetimeIndex(['2017-01-06', '2017-01-06', 'NaT'], dtype='datetime64[ns]', freq=None)

## Epoch

#### Epoch or Unix time means number of seconds that have passed since Jan 1, 1970 00:00:00 UTC time

In [9]:
# Python by default presents time including nano seconds
# Whereas UNIX considers by default time from seconds

In [10]:
check_epoch = 144256120120
print(pd.to_datetime(check_epoch))
print('Here it has included nanoseconds. The last 9 digits form nanoseconds')

1970-01-01 00:02:24.256120120
Here it has included nanoseconds. The last 9 digits form nanoseconds


In [11]:
check_epoch = 1442561201
pd.to_datetime(check_epoch, unit = 's')

Timestamp('2015-09-18 07:26:41')

In [12]:
check_epoch = 1442561201000
pd.to_datetime(check_epoch, unit = 'ms')

Timestamp('2015-09-18 07:26:41')

In [13]:
check_epoch_1 = 1442561201
t = pd.to_datetime(check_epoch_1, unit ='s')
type(t)

pandas._libs.tslibs.timestamps.Timestamp

In [14]:
t = pd.to_datetime([check_epoch_1], unit ='s')
type(t)

pandas.core.indexes.datetimes.DatetimeIndex

In [15]:
t

DatetimeIndex(['2015-09-18 07:26:41'], dtype='datetime64[ns]', freq=None)

In [16]:
# We can convert datetime  into its integer form by:
t.view('int64')

array([1442561201000000000], dtype=int64)

In [17]:
pd.to_datetime('19/02', format = '%Y/%m', yearfirst=True)   
# Here 19/02 can be 19th Feb or it can be year 2019, Feb 02. So, to remove the confusion give desired format.

ValueError: time data 19/02 doesn't match format specified

In [None]:
dft = pd.read_csv()

# Pandas Date Range

In [None]:
df = pd.read_csv('C:/Users/hp/Downloads/Python Resources and learning Manoj Sir/aapl_no_dates.csv')
df.shape

In [None]:
df.head(7)

In [None]:
range1 = pd.date_range(start ='10/03/2017', end='11/1/2017', freq='B')
# Here we know the range so we directly entered start & end, otherwise use 'period'(which is used later..)
# B referes to Business Days.
range1

In [None]:
df.set_index(range1, inplace=True)

In [None]:
df.head()

## Finding missing dates from date time index

In [None]:
daily_index = pd.date_range(start="10/03/2017",end="11/1/2017",freq='D')
# D refers to Daily date time. It includes all days of the week.
daily_index

In [None]:
daily_index.difference(df.index)
# finding difference between two date indices

## Benefits of having DatetimeIndex

In [None]:
%matplotlib inline
df.Close.plot()

In [None]:
df.head(3)

In [None]:
df['2017-10-05':'2017-10-25']

In [None]:
# We can find mean of any value in any given date range
df['2017-10-05':'2017-10-25']['Close'].mean()

### Pandas dataframe.asfreq() function is used to convert TimeSeries to specified frequency. 

In [None]:
df.asfreq('D', method='pad')
# D is for daily
# pad is to copy or pad the exact data to next date time if the next date is not mentioned in the original tableb

In [None]:
df.asfreq('W', method = 'pad')  
# Present data in Weekly frequency

In [None]:
df.asfreq('H', method='pad')
# Present data in Hourly frequency

## Generating DatetimeIndex with periods argument

In [None]:
rng = pd.date_range('05/09/2012', periods=48, freq='H')
rng
# date_range genrates datetime index unlike period_range which only generates 

In [None]:
import numpy as np
ts = pd.Series(np.random.randint(0,10, len(rng)), index=rng)
ts.head(20)

# Pandas Time Series Tutorial: DateTime Index

In [None]:
file1 = 'C:/Users/hp/Downloads/Python Resources and learning Manoj Sir/aapl.csv'
dfa = pd.read_csv(file1, parse_dates=['Date'], index_col='Date')
# parse_dates makes the Date column to be read as datetime
dfa.head()

In [None]:
dfa.index

## What is DatetimeIndex? Benefits of it

### (1) Partial Date Index: Select Specific Months Data

In [None]:
dfa['2017-06-01':'2017-06-06']

In [None]:
dfa['2017-06'].tail()   # We can show particular month data

### Average price of aapl's stock in June, 2017

In [None]:
dfa['2017-6']['Close'].mean()

In [None]:
# We can also show data by Year
dfa['2017'].head(3)

In [None]:
dfa['2017'].head()

# Pandas Period:

# Pandas Time Series Analysis: Period and PeriodIndex

## Year Period

In [None]:
import pandas as pd

In [None]:
pd.Period?

In [None]:
y = pd.Period('2016')
y
# In the  output you can see A-DEC. 
# A-DEC refers that the year ends at December.This is by default month if not mentioned.

In [None]:
y.start_time
# If not mentioned in the time, system shows date as Jan 01 and time from 00.00.00

In [None]:
y.end_time

In [None]:
y.is_leap_year

In [None]:
y1 = pd.Period('2017', 'A-JUN')
y1

## Monthly period

In [None]:
m = pd.Period('2016-09')
m

In [None]:
m.start_time

In [None]:
m.end_time

In [None]:
m+1

## Daily Period

In [None]:
d = pd.Period('2016-07-12')
d
# Here D refers to D Daily

In [None]:
d.start_time

In [None]:
d.end_time

In [None]:
d+1

## Hourly Period

In [None]:
h = pd.Period('2016-07-13 21:00:05', freq= 'H')
h
# We mention the frequency as H, otherwise by default system takes 'S' freq.

In [None]:
h.start_time
# It starts from given hour

In [None]:
h.end_time
# It ends at given hour

In [None]:
h+1

## Quarterly Period

In [None]:
q1 = pd.Period('2018Q1', freq = 'Q-JUN')
q1
# Here Q1 refers to Quarter 1. freq  Q JUN means Quarter ends at JUNE

In [None]:
q1.start_time
# Observe carefully: The in o/p shows that Quarter starts from 2017 July.
# Since we gave Quarter ending as June 2018, this means the quarter started at July 2017

In [None]:
q1.end_time
# This shows the end time of Quarter 1

## Weekly Period

In [None]:
w = pd.Period('2018-04-26', freq = 'W')
w
# This shows the week. It also shows W-SUN which means week ends on Sunday

In [None]:
w-1

In [None]:
w2 = pd.Period('2018-05-10', freq = 'W')
w2

In [None]:
w2-w

## Period Index and Period Range continued in II Date Time Learning file...

In [None]:
r = pd.period_range('2017', '2019', freq = 'q')
r
# If you observe the result, the freq is of the Quarter DEC.
# It means that Quarter is ending at Dcember

In [None]:
r[0].start_time

In [None]:
r[0].end_time

### Walmart's fiscal year ends in Jan, below is how you generate walmart's fiscal quarters between 2011 and 2017

In [None]:
wal = pd.period_range('2011', '2017', freq = 'q-jan')
wal
# Here we gave 'q-jan', that means Quarter ends in January.
# This means Quarter starts in Feb 2010.
# Since we gave a range of 2011 to 2017, it shows o/p wherever there is 2011 to 2017 in the quarters

In [None]:
wal[0].start_time
# this shows o/p of Quarter 4 of 2010-11 fiscal year
# Why does it show the start time from Q4?
# Its because we gave period range from 2011. And Q4 has 3 months i.e. 
# Nov-2010, Dec-2010, Jan-2011

In [None]:
wal[0].end_time

In [None]:
print(len(wal))
wal[22].start_time

##### Other uses/functions of period

In [None]:
import numpy as np

In [None]:
ps = pd.Series(np.random.randn(len(wal)), wal)
ps

In [None]:
import pandas as pd

In [None]:
pd.period_range('2011', '2017', freq = 'A-DEC')

In [None]:
r = pd.period_range('2017', '2019', freq = 'B')
r

In [None]:
pd.date_range('2011', '2017', freq = 'A-DEC')