In [1]:
%matplotlib inline
import numpy as np
import numpy.ma as ma
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
import timeit
import line_profiler

import datetime

from io import StringIO
from pandas.api.types import CategoricalDtype

import pandas._testing as tm

# 2.19 Time series / date functionality

In [10]:
dti = pd.to_datetime(
    ['1.1.2018', np.datetime64('2018-01-01'), datetime.datetime(2018,1,1)]
)
dti[1]

Timestamp('2018-01-01 00:00:00')

In [11]:
dti = pd.date_range('2018-01-01', periods=3, freq='4H')
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 04:00:00',
               '2018-01-01 08:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [12]:
dti = dti.tz_localize('Europe/Berlin')


In [13]:
dti.tz_convert('US/Central')

DatetimeIndex(['2017-12-31 17:00:00-06:00', '2017-12-31 21:00:00-06:00',
               '2018-01-01 01:00:00-06:00'],
              dtype='datetime64[ns, US/Central]', freq=None)

In [14]:
idx = pd.date_range("2018-01-01", periods=5, freq="H")
ts = pd.Series(range(len(idx)), index=idx)
print(ts)
ts.resample("2H").count()
print(ts.resample("2H").mean())
ts.resample("2H").median()

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
Freq: H, dtype: int64
2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: 2H, dtype: float64


2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: 2H, dtype: float64

In [15]:
friday = pd.Timestamp("2018-01-05")
friday.day_name()
saturday = friday + pd.Timedelta("1 day")
saturday.day_name() # 'Saturday'

# Add 1 business day (Friday --> Monday)
monday = friday + pd.offsets.BDay()
monday.day_name()

'Monday'

In [16]:
pd.Series(range(3), index=pd.date_range('2020', freq='D', periods=3))


2020-01-01    0
2020-01-02    1
2020-01-03    2
Freq: D, dtype: int64

In [17]:
df = pd.DataFrame({"type A":[15], "type B": [20], "date": ["2012-03-01"], "station": ["s1"]})

In [18]:
df

Unnamed: 0,type A,type B,date,station
0,15,20,2012-03-01,s1


In [19]:
friday = pd.Timestamp("2018-01-05")
friday.day_name()
friday + pd.Timedelta('1 Day')
(friday + pd.offsets.Day()).day_name()

'Saturday'

In [20]:
s = pd.Series(pd.date_range('2020', periods=3))
df = s.to_frame()

In [21]:
df = pd.Series(pd.date_range('1/13/2021', periods=6)).to_frame()
df.columns=["dates"]
df['day'] = df.dates.dt.day_name()
df['weekday'] = df.dates.dt.weekday
df

Unnamed: 0,dates,day,weekday
0,2021-01-13,Wednesday,2
1,2021-01-14,Thursday,3
2,2021-01-15,Friday,4
3,2021-01-16,Saturday,5
4,2021-01-17,Sunday,6
5,2021-01-18,Monday,0


In [22]:
pd.Timestamp(pd.NaT)
pd.Timedelta(pd.NaT)
pd.Period(pd.NaT)
pd.DateOffset()

<DateOffset>

In [23]:
pd.Timedelta('14 days')//pd.Timedelta(1, 'W')+1

3

In [24]:
pd.Timestamp("2012-05-01")
pd.Timestamp(datetime.datetime(2012, 5, 1))
pd.Timestamp(2012, 5, 1)

Timestamp('2012-05-01 00:00:00')

In [25]:
pd.Period("2011-01")
pd.Period("2011.01", 'D')
pd.Period("2011.01", freq='D')

Period('2011-01-01', 'D')

In [26]:
dates = pd.date_range('2020/01/01', periods=3)
ts = pd.Series(np.arange(3), dates)
type(ts.index) # pandas.core.indexes.datetimes.DatetimeIndex

pandas.core.indexes.datetimes.DatetimeIndex

In [27]:
periods = pd.date_range('2021-01', periods=3, freq='M')
periods = periods.to_period()
type(periods) # pandas.core.indexes.period.PeriodIndex
ts = pd.Series(np.arange(3), periods)
ts.index # PeriodIndex(['2021-01', '2021-02', '2021-03'], dtype='period[M]', freq='M')
type(ts.index) #pandas.core.indexes.period.PeriodIndex

pandas.core.indexes.period.PeriodIndex

In [28]:
pd.date_range('2018-01', periods=3, freq='M')
pd.period_range('2018-01', periods=3, freq='M')

PeriodIndex(['2018-01', '2018-02', '2018-03'], dtype='period[M]', freq='M')

In [29]:
pd.Period("2011-01", 'D')
pd.Timestamp("2011-01", 'D')

Timestamp('2011-01-01 00:00:00', freq='D')

In [31]:
pd.date_range('2021-01', periods=3, freq='M').to_period()

PeriodIndex(['2021-01', '2021-02', '2021-03'], dtype='period[M]', freq='M')

In [40]:
print(pd.Timestamp('2020.01.01')) # 2020-01-01 00:00:00
pd.to_datetime(['2020.01.01', '2020.01.02'])
pd.date_range('2020.01.01', periods=3, freq='D')

pd.Period('2020.01.01')

2020-01-01 00:00:00


Period('2020-01-01', 'D')

In [50]:
pd.to_datetime(pd.Series(["Jul 31, 2020","2010-01-10", None])) 
pd.to_datetime(["Jul 31, 2020","2010-01-10", None]) # DatetimeIndex(['2020-07-31', '2010-01-10', 'NaT'], dtype='datetime64[ns]', freq=None)

DatetimeIndex(['2020-07-31', '2010-01-10', 'NaT'], dtype='datetime64[ns]', freq=None)

In [55]:
pd.to_datetime('04-01-2021',dayfirst=True)
pd.to_datetime('04-18-2021',dayfirst=True) # Timestamp('2021-04-18 00:00:00')

Timestamp('2021-04-18 00:00:00')

In [61]:
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer") # DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq='2D')
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-06"], freq="infer") # DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-06'], dtype='datetime64[ns]', freq=None)

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-06'], dtype='datetime64[ns]', freq=None)

In [63]:
pd.to_datetime("2020/11/12", format="%Y/%m/%d")
pd.to_datetime("12-11-2010 00:00", format="%d-%m-%Y %H:%M")

Timestamp('2010-11-12 00:00:00')

In [2]:
pd.Timestamp.max

Timestamp('2262-04-11 23:47:16.854775807')

In [11]:
pd.Timestamp('2021.04.30') + pd.offsets.BQuarterEnd()

Timestamp('2021-06-30 00:00:00')

In [16]:
periods = pd.date_range('2021-01', periods=3, freq='M')
periods.dtype

dtype('<M8[ns]')

In [19]:
pd.Timestamp.max

Timestamp('2262-04-11 23:47:16.854775807')

In [27]:
df = DataFrame({
    'year': [2021, 2021],
    'month': [4,5],
    'day':[30, 1],
    'hours':[23, 1]
})
pd.to_datetime(df[['year', 'month', 'day']]) # ValueError: to assemble mappings requires at least that [year, month, day] be specified: [day] is missing

0   2021-04-30
1   2021-05-01
dtype: datetime64[ns]

In [37]:
pd.to_datetime(['2020/01/01', '2021.01.02', 'asd']) # ParserError: Unknown string format: asd
pd.to_datetime(['2020/01/01', '2021.01.02', 'asd'], errors='raise') # ParserError: Unknown string format: asd
pd.to_datetime(['2020/01/01', '2021.01.02', 'asd'], errors='ignore')
pd.to_datetime(['2020/01/01', '2021.01.02', 'asd'], errors='coerce')

DatetimeIndex(['2020-01-01', '2021-01-02', 'NaT'], dtype='datetime64[ns]', freq=None)

In [46]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 
                1349979305, 1350065705], unit="s")
pd.to_datetime([1349720105100, 1349720105200, 1349720105300, 
                1349720105400,1349720105500], unit='ms')
pd.to_datetime([1618866007.161886600, 1618866007161886601])

DatetimeIndex(['1970-01-01 00:00:01.618866007', '2021-04-19 21:00:07.161886601'], dtype='datetime64[ns]', freq=None)

In [45]:
pd.to_datetime(1490195805433502912, unit="ns")

Timestamp('2017-03-22 15:16:45.433502912')

In [53]:
stamps = pd.date_range("2012-10-08 18:15:05", periods=4, freq="D")
(stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")

Int64Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')

In [58]:
pd.to_datetime([0, 1, 2], unit="D", origin=pd.Timestamp("2000-01-01"))

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], dtype='datetime64[ns]', freq=None)

In [59]:
daily = pd.DataFrame({'Date': pd.date_range(start="2021-01-01",end="2021-04-29")})
pc21 = pd.DataFrame({'Date': ["21-01-2021", "11-03-2021", "22-04-2021"]})
pc21['Date'] = pd.to_datetime(pc21['Date'])

In [71]:
daily.head()

Unnamed: 0,Date
0,2021-01-01
1,2021-01-02
2,2021-01-03
3,2021-01-04
4,2021-01-05


In [63]:
pc21 = pd.DataFrame({'Date': ["21-01-2021", "11-03-2021", "22-04-2021"]})
pc21['Date'] = pd.to_datetime(pc21['Date'])
pc21

Unnamed: 0,Date
0,2021-01-21
1,2021-11-03
2,2021-04-22


In [74]:
daily['newcol'] = np.where(daily.Date.isin(pc21.Date),1,0)
daily.head(21)

Unnamed: 0,Date,newcol
0,2021-01-01,0
1,2021-01-02,0
2,2021-01-03,0
3,2021-01-04,0
4,2021-01-05,0
5,2021-01-06,0
6,2021-01-07,0
7,2021-01-08,0
8,2021-01-09,0
9,2021-01-10,0


In [111]:
daily.query('value.eq(1)')

Unnamed: 0,Date,value
20,2021-01-21,1.0
21,2021-01-22,1.0
22,2021-01-23,1.0
23,2021-01-24,1.0
24,2021-01-25,1.0
25,2021-01-26,1.0
26,2021-01-27,1.0
27,2021-01-28,1.0
69,2021-03-11,1.0
70,2021-03-12,1.0


In [82]:
%timeit daily.Date.isin(pc21.Date).astype(int)

326 µs ± 29.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [83]:
%timeit np.where(daily.Date.isin(pc21.Date),1,0)

244 µs ± 18.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [88]:
daily = pd.DataFrame({'Date': pd.date_range(start="2021-01-01",end="2021-04-29")})
pc21 = pd.DataFrame({'Date': ["2021-01-21", "2021-03-11", "2021-04-22"]})
pc21['Date'] = pd.to_datetime(pc21['Date'])

In [85]:
daily.head()

Unnamed: 0,Date
0,2021-01-01
1,2021-01-02
2,2021-01-03
3,2021-01-04
4,2021-01-05


In [92]:
pc21

Unnamed: 0,Date,value
0,2021-01-21,1
1,2021-03-11,1
2,2021-04-22,1


In [123]:
daily['value'] = 0
pc21['value'] = 1
#daily = pd.merge(daily, pc21, on='Date', how='left').rename(
    #columns={'value_y':'value'}).drop('value_x', 1).fillna(method="ffill", limit=7).fillna(0)
#pc21.drop('value',1)

pd.merge_ordered(daily, pc21, on='Date', 
                 how='left', fill_method='ffill', limit=7).head(30)
#daily.head(30)

TypeError: merge_ordered() got an unexpected keyword argument 'limit'

In [124]:
d="""Col1,Col2,Col3
x,a,10
x,b,12
x,c,25
y,a,13
y,b,14
y,c,37"""
df=gdf(d)
df

NameError: name 'gdf' is not defined