In [8]:
# Converting Strings to Dates
import numpy as np
import pandas as pd

date_strings = np.array(['03-04-2005 11:35 PM',
                         '23-05-2010 12:01 AM',
                         '04-09-2009 09:09 PM'])

[pd.to_datetime(date, format='%d-%m-%Y %I:%M %p') for date in date_strings]

[pd.to_datetime(date, format='%d-%m-%Y %I:%M %p', errors='coerce') for date in date_strings]

[Timestamp('2005-04-03 23:35:00'),
 Timestamp('2010-05-23 00:01:00'),
 Timestamp('2009-09-04 21:09:00')]

In [17]:
# Handling Time Zones
import pandas as pd

pd.Timestamp('2017-05-01 06:00:00', tz='Europe/London')

date = pd.Timestamp('2017-05-01 06:00:00')
date_in_london = date.tz_localize('Europe/London')
date_in_london

date_in_london.tz_convert('Asia/Manila')

Timestamp('2017-05-01 13:00:00+0800', tz='Asia/Manila')

In [21]:
dates = pd.Series(pd.date_range('2/2/2002', periods=3, freq='M'))
dates

dates.dt.tz_localize('Asia/Manila')

  dates = pd.Series(pd.date_range('2/2/2002', periods=3, freq='M'))


0   2002-02-28 00:00:00+08:00
1   2002-03-31 00:00:00+08:00
2   2002-04-30 00:00:00+08:00
dtype: datetime64[ns, Asia/Manila]

In [24]:
from pytz import all_timezones

all_timezones

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome',
 'Africa/Timbuktu',
 'Africa/

In [2]:
# Selecting Dates and Times
import pandas as pd

dateframe = pd.DataFrame()

dateframe['date'] = pd.date_range('1/1/2001', periods=100000, freq='H')

dateframe[(dateframe['date'] > '2002-1-1 01:00:00') &
          (dateframe['date'] <= '2002-1-1 04:00:00')]

  dateframe['date'] = pd.date_range('1/1/2001', periods=100000, freq='H')


Unnamed: 0,date
8762,2002-01-01 02:00:00
8763,2002-01-01 03:00:00
8764,2002-01-01 04:00:00


In [3]:
dateframe = dateframe.set_index(dateframe['date'])

dateframe.loc['2002-1-1 01:00:00':'2002-1-1 04:00:00']

Unnamed: 0_level_0,date
date,Unnamed: 1_level_1
2002-01-01 01:00:00,2002-01-01 01:00:00
2002-01-01 02:00:00,2002-01-01 02:00:00
2002-01-01 03:00:00,2002-01-01 03:00:00
2002-01-01 04:00:00,2002-01-01 04:00:00


In [None]:
# Breaking Up Date Data into Multiple Features
import pandas as pd

dataframe = pd.DataFrame()

dataframe['date'] = pd.date_range('1/1/2001', periods=150, freq='W')

dataframe['year'] = dataframe['date'].dt.year
dataframe['month'] = dataframe['date'].dt.month
dataframe['day'] = dataframe['date'].dt.day
dataframe['hour'] = dataframe['date'].dt.hour
dataframe['minute'] = dataframe['date'].dt.minute

dataframe

Unnamed: 0,date,year,month,day,hour,minute
0,2001-01-07,2001,1,7,0,0
1,2001-01-14,2001,1,14,0,0
2,2001-01-21,2001,1,21,0,0
3,2001-01-28,2001,1,28,0,0
4,2001-02-04,2001,2,4,0,0
...,...,...,...,...,...,...
145,2003-10-19,2003,10,19,0,0
146,2003-10-26,2003,10,26,0,0
147,2003-11-02,2003,11,2,0,0
148,2003-11-09,2003,11,9,0,0


In [10]:
# Calculating the Difference Between Dates
import pandas as pd

dataframe = pd.DataFrame()

dataframe['Arrived'] = [pd.Timestamp('01-01-2017'), pd.Timestamp('01-04-2017')]
dataframe['Left'] = [pd.Timestamp('01-01-2017'), pd.Timestamp('01-06-2017')]

dataframe['Left'] - dataframe['Arrived']

0   0 days
1   2 days
dtype: timedelta64[ns]

In [11]:
pd.Series(delta.days for delta in (dataframe['Left'] - dataframe['Arrived']))

0    0
1    2
dtype: int64

In [13]:
# Encoding Days of the Week
import pandas as pd

dates = pd.Series(pd.date_range("2/2/2002", periods=3, freq="M"))

dates.dt.day_name()


  dates = pd.Series(pd.date_range("2/2/2002", periods=3, freq="M"))


0    Thursday
1      Sunday
2     Tuesday
dtype: object

In [15]:
dates.dt.weekday

0    3
1    6
2    1
dtype: int32

In [18]:
# Creating a Lagged Feature
import pandas as pd

dataframe = pd.DataFrame()

dataframe["dates"] = pd.date_range("1/1/2001", periods=5, freq="D")
dataframe["stock_price"] = [1.1, 2.2, 3.3, 4.4, 5.5]

dataframe["previous_days_stock_price"] = dataframe["stock_price"].shift(1)

dataframe

Unnamed: 0,dates,stock_price,previous_days_stock_price
0,2001-01-01,1.1,
1,2001-01-02,2.2,1.1
2,2001-01-03,3.3,2.2
3,2001-01-04,4.4,3.3
4,2001-01-05,5.5,4.4


In [25]:
# Using Rolling Time Windows
import pandas as pd

time_index = pd.date_range("01/01/2010", periods=5, freq="M")

dataframe = pd.DataFrame(index=time_index)

dataframe["Stock_Price"] = [1,2,3,4,5]

dataframe.rolling(window=2).mean()

  time_index = pd.date_range("01/01/2010", periods=5, freq="M")


Unnamed: 0,Stock_Price
2010-01-31,
2010-02-28,1.5
2010-03-31,2.5
2010-04-30,3.5
2010-05-31,4.5


In [30]:
# Handling Missing Data in Time Series
import pandas as pd
import numpy as np

time_index = pd.date_range('01/01/2010', periods=5, freq="M")

dataframe = pd.DataFrame(index=time_index)

dataframe["Sales"] = [1.0,2.0,np.nan,np.nan,5.0]

dataframe.interpolate()

  time_index = pd.date_range('01/01/2010', periods=5, freq="M")


Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.0
2010-04-30,4.0
2010-05-31,5.0


In [32]:
dataframe.ffill()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,2.0
2010-04-30,2.0
2010-05-31,5.0


In [34]:
dataframe.bfill()

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,5.0
2010-04-30,5.0
2010-05-31,5.0


In [37]:
dataframe.interpolate(method="quadratic")

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.059808
2010-04-30,4.038069
2010-05-31,5.0


In [40]:
dataframe.interpolate(limit=1, limit_direction="forward")

Unnamed: 0,Sales
2010-01-31,1.0
2010-02-28,2.0
2010-03-31,3.0
2010-04-30,
2010-05-31,5.0
