In [1]:
# Pandas Web User Guide Follow along
# Time Series and Time Deltas
# https://pandas.pydata.org/docs/user_guide/timeseries.html
# Created 11/29/20

%matplotlib notebook

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import StringIO
import datetime

# How interactive you want is discussed:
# https://ipython.readthedocs.io/en/stable/config/options/terminal.html
# Options are: 'all', 'last', 'last_expr', 'none', 'last_expr_or_assign'
# Default is: 'last_expr'

from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity I can only get last_expr_or_assign to work
InteractiveShell.ast_node_interactivity = "last_expr_or_assign"

In [2]:
def diag(*args):
    """Pandas diagnostics"""
    
    for i in args:
        
        if isinstance(i, pd.core.frame.DataFrame):
            print(i.info())
            display(i)
        else:
            print(f'{"-"*40}')
            print(f'Type: {type(i)}')

            try:
                print(f'Length: {len(i)}')
            except:
                pass

            try:
                print(i.info())
            except:
                pass

            try:
                display(i)
            except:
                print(i)
                
z = diag
d = display;

In [3]:
def read_df(text):
    """Create a pandas dataframe from a string of a dataframe
    copied from the pandas website tutorial."""
    lines = text.split('\n')
    cols = lines[0].split()
    index, array = [], []
    for line in lines[1:]:
        vals = line.split()
        index.append(vals[0])
        array.append(vals[1:])
#     print(cols)
#     print(index)
#     print(array)
    df = pd.DataFrame(array, index=index, columns=cols)
    return df

In [5]:
dti = pd.date_range('2018-01-01', periods=3, freq='H')

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq='H')

In [6]:
dti = dti.tz_localize('UTC')

DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='H')

In [7]:
pd.Series(pd.period_range('1/1/2011', freq='M', periods=3))

0    2011-01
1    2011-02
2    2011-03
dtype: period[M]

In [8]:
pd.Series(pd.period_range('1/1/2011', freq='M', periods=3))

0    2011-01
1    2011-02
2    2011-03
dtype: period[M]

In [12]:
pd.Series([pd.DateOffset(1), pd.DateOffset(2), pd.DateOffset(4.0)])

0         <DateOffset>
1    <2 * DateOffsets>
2    <4 * DateOffsets>
dtype: object

In [15]:
pd.Series(pd.date_range(start='1/1/2011', freq='M', periods=8))

0   2011-01-31
1   2011-02-28
2   2011-03-31
3   2011-04-30
4   2011-05-31
5   2011-06-30
6   2011-07-31
7   2011-08-31
dtype: datetime64[ns]

In [16]:
pd.NaT is pd.NaT

True

In [18]:
pd.NaT is pd.NA

False

In [19]:
pd.NaT is pd.NaT

True

In [20]:
pd.NA is np.nan

False

In [21]:
dates = [pd.Timestamp('2012-05-01'),
         pd.Timestamp('2012-05-02'),
         pd.Timestamp('2012-05-03')]

[Timestamp('2012-05-01 00:00:00'),
 Timestamp('2012-05-02 00:00:00'),
 Timestamp('2012-05-03 00:00:00')]

In [23]:
ts = pd.Series(np.random.randn(3), dates)
z(ts.index, ts)

----------------------------------------
Type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Length: 3


DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 3


2012-05-01    0.605450
2012-05-02   -0.245871
2012-05-03    1.631007
dtype: float64

In [136]:
periods = [pd.Period('2012-01'), pd.Period('2012-02'), pd.Period('2012-03')]
ts = pd.Series(np.random.randn(3), periods)
z(ts.index, ts)

----------------------------------------
Type: <class 'pandas.core.indexes.period.PeriodIndex'>
Length: 3


PeriodIndex(['2012-01', '2012-02', '2012-03'], dtype='period[M]', freq='M')

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 3


2012-01   -1.971659
2012-02   -0.659482
2012-03   -1.224457
Freq: M, dtype: float64

In [25]:
pd.to_datetime(pd.Series(['Jul 31, 2009', '2010-01-10', None]))

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

In [26]:
pd.to_datetime(['2005/11/23', '2010.12.31'])

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)

In [137]:
df = pd.DataFrame({'year': [2015, 2016],
   ....:                    'month': [2, 3],
   ....:                    'day': [4, 5],
   ....:                    'hour': [2, 3]})

Unnamed: 0,year,month,day,hour
0,2015,2,4,2
1,2016,3,5,3


In [139]:
df2 = pd.to_datetime(df)
z(df2, df2.dtypes, )

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 2


0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

----------------------------------------
Type: <class 'numpy.dtype'>
Length: 0


dtype('<M8[ns]')

In [29]:
pd.to_datetime([1349720105, 1349806505, 1349892905,
   ....:                 1349979305, 1350065705], unit='s')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

In [30]:
pd.to_datetime([1349720105, 1349806505, 1349892905,
   ....:                 1349979305, 1350065705], unit='ms')

DatetimeIndex(['1970-01-16 14:55:20.105000', '1970-01-16 14:56:46.505000',
               '1970-01-16 14:58:12.905000', '1970-01-16 14:59:39.305000',
               '1970-01-16 15:01:05.705000'],
              dtype='datetime64[ns]', freq=None)

In [140]:
stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05'],
              dtype='datetime64[ns]', freq='D')

In [142]:
a = stamps - pd.Timestamp("1970-01-01")
b = a // pd.Timedelta('1s')
z(a, b)

----------------------------------------
Type: <class 'pandas.core.indexes.timedeltas.TimedeltaIndex'>
Length: 4


TimedeltaIndex(['15621 days 18:15:05', '15622 days 18:15:05',
                '15623 days 18:15:05', '15624 days 18:15:05'],
               dtype='timedelta64[ns]', freq=None)

----------------------------------------
Type: <class 'pandas.core.indexes.numeric.Int64Index'>
Length: 4


Int64Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')

In [37]:
pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1700-01-01'))

DatetimeIndex(['1700-01-02', '1700-01-03', '1700-01-04'], dtype='datetime64[ns]', freq=None)

In [143]:
start = datetime.datetime(2011, 1, 1)
end = datetime.datetime(2012, 1, 1)
index = pd.date_range(start, end)

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',
               '2011-12-31', '2012-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')

In [144]:
a = pd.date_range(start, periods=1000, freq='M')
b = pd.date_range(start, periods=1000, freq='MS')
z(a, b)

----------------------------------------
Type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Length: 1000


DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',
               '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',
               '2094-03-31', '2094-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')

----------------------------------------
Type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Length: 1000


DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
               '2011-05-01', '2011-06-01', '2011-07-01', '2011-08-01',
               '2011-09-01', '2011-10-01',
               ...
               '2093-07-01', '2093-08-01', '2093-09-01', '2093-10-01',
               '2093-11-01', '2093-12-01', '2094-01-01', '2094-02-01',
               '2094-03-01', '2094-04-01'],
              dtype='datetime64[ns]', length=1000, freq='MS')

In [42]:
pd.date_range('2018-01-01', '2018-01-05', periods=5)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05'],
              dtype='datetime64[ns]', freq=None)

In [43]:
pd.date_range('2018-01-01', '2018-01-05', periods=10)

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 10:40:00',
               '2018-01-01 21:20:00', '2018-01-02 08:00:00',
               '2018-01-02 18:40:00', '2018-01-03 05:20:00',
               '2018-01-03 16:00:00', '2018-01-04 02:40:00',
               '2018-01-04 13:20:00', '2018-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)

In [44]:
rng = pd.date_range(start, end, freq='BM')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [145]:
pd.date_range(start, end, freq='W')

DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',
               '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',
               '2011-02-27', '2011-03-06', '2011-03-13', '2011-03-20',
               '2011-03-27', '2011-04-03', '2011-04-10', '2011-04-17',
               '2011-04-24', '2011-05-01', '2011-05-08', '2011-05-15',
               '2011-05-22', '2011-05-29', '2011-06-05', '2011-06-12',
               '2011-06-19', '2011-06-26', '2011-07-03', '2011-07-10',
               '2011-07-17', '2011-07-24', '2011-07-31', '2011-08-07',
               '2011-08-14', '2011-08-21', '2011-08-28', '2011-09-04',
               '2011-09-11', '2011-09-18', '2011-09-25', '2011-10-02',
               '2011-10-09', '2011-10-16', '2011-10-23', '2011-10-30',
               '2011-11-06', '2011-11-13', '2011-11-20', '2011-11-27',
               '2011-12-04', '2011-12-11', '2011-12-18', '2011-12-25',
               '2012-01-01'],
              dtype='datetime64[ns]', freq='W-S

In [147]:
a = pd.date_range(start, end, freq='W-MON')

DatetimeIndex(['2011-01-03', '2011-01-10', '2011-01-17', '2011-01-24',
               '2011-01-31', '2011-02-07', '2011-02-14', '2011-02-21',
               '2011-02-28', '2011-03-07', '2011-03-14', '2011-03-21',
               '2011-03-28', '2011-04-04', '2011-04-11', '2011-04-18',
               '2011-04-25', '2011-05-02', '2011-05-09', '2011-05-16',
               '2011-05-23', '2011-05-30', '2011-06-06', '2011-06-13',
               '2011-06-20', '2011-06-27', '2011-07-04', '2011-07-11',
               '2011-07-18', '2011-07-25', '2011-08-01', '2011-08-08',
               '2011-08-15', '2011-08-22', '2011-08-29', '2011-09-05',
               '2011-09-12', '2011-09-19', '2011-09-26', '2011-10-03',
               '2011-10-10', '2011-10-17', '2011-10-24', '2011-10-31',
               '2011-11-07', '2011-11-14', '2011-11-21', '2011-11-28',
               '2011-12-05', '2011-12-12', '2011-12-19', '2011-12-26'],
              dtype='datetime64[ns]', freq='W-MON')

In [150]:
a.month

Int64Index([ 1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,  4,  4,  4,  4,
             5,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
             8,  9,  9,  9,  9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12,
            12],
           dtype='int64')

In [156]:
a = pd.bdate_range(start=start, periods=20, freq='BMS')
a.resolution

'day'

In [45]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)

2011-01-31    0.469248
2011-02-28    0.353420
2011-03-31   -0.255962
2011-04-29    1.542482
2011-05-31   -0.137880
2011-06-30   -0.975482
2011-07-29   -1.575433
2011-08-31   -1.561767
2011-09-30    1.059957
2011-10-31   -0.392001
2011-11-30    0.811126
2011-12-30   -0.219556
Freq: BM, dtype: float64

In [46]:
ts[::2]

2011-01-31    0.469248
2011-03-31   -0.255962
2011-05-31   -0.137880
2011-07-29   -1.575433
2011-09-30    1.059957
2011-11-30    0.811126
Freq: 2BM, dtype: float64

In [161]:
dft = pd.DataFrame(np.random.randn(100000, 1), columns=['A'],
                  index=pd.date_range('20130101', periods=100000, freq='T'))
z(dft.index.resolution)
z(dft.index.freq)

----------------------------------------
Type: <class 'str'>
Length: 6


'minute'

----------------------------------------
Type: <class 'pandas._libs.tslibs.offsets.Minute'>


<Minute>

In [48]:
dft['2013']

Unnamed: 0,A
2013-01-01 00:00:00,-0.188308
2013-01-01 00:01:00,-0.908766
2013-01-01 00:02:00,3.289080
2013-01-01 00:03:00,1.546936
2013-01-01 00:04:00,-0.481675
...,...
2013-03-11 10:35:00,-0.467292
2013-03-11 10:36:00,-0.777721
2013-03-11 10:37:00,0.187142
2013-03-11 10:38:00,-2.921198


In [154]:
series_minute = pd.Series([1, 2, 3],
   .....:                           pd.DatetimeIndex(['2011-12-31 23:59:00',
   .....:                                             '2012-01-01 00:00:00',
   .....:                                             '2012-01-01 00:02:00']))
z(series_minute.index, series_minute.index.resolution, series_minute)

----------------------------------------
Type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Length: 3


DatetimeIndex(['2011-12-31 23:59:00', '2012-01-01 00:00:00',
               '2012-01-01 00:02:00'],
              dtype='datetime64[ns]', freq=None)

----------------------------------------
Type: <class 'str'>
Length: 6


'minute'

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 3


2011-12-31 23:59:00    1
2012-01-01 00:00:00    2
2012-01-01 00:02:00    3
dtype: int64

In [167]:
series_minute['2012-01-01 00']

2012-01-01 00:00:00    2
2012-01-01 00:02:00    3
dtype: int64

In [173]:
series_monthly = pd.Series([1, 2, 3],
                 pd.DatetimeIndex(['2011-12', '2012-01', '2012-02']))
print(series_monthly.index.resolution)
print(series_monthly['2011-12'])

day
2011-12-01    1
dtype: int64


In [53]:
ts

2011-01-31    0.469248
2011-02-28    0.353420
2011-03-31   -0.255962
2011-04-29    1.542482
2011-05-31   -0.137880
2011-06-30   -0.975482
2011-07-29   -1.575433
2011-08-31   -1.561767
2011-09-30    1.059957
2011-10-31   -0.392001
2011-11-30    0.811126
2011-12-30   -0.219556
Freq: BM, dtype: float64

In [54]:
z(ts)

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 12


2011-01-31    0.469248
2011-02-28    0.353420
2011-03-31   -0.255962
2011-04-29    1.542482
2011-05-31   -0.137880
2011-06-30   -0.975482
2011-07-29   -1.575433
2011-08-31   -1.561767
2011-09-30    1.059957
2011-10-31   -0.392001
2011-11-30    0.811126
2011-12-30   -0.219556
Freq: BM, dtype: float64

In [56]:
ts['2011-06']

2011-06-30   -0.975482
Freq: BM, dtype: float64

In [57]:
z(dft)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 100000 entries, 2013-01-01 00:00:00 to 2013-03-11 10:39:00
Freq: T
Data columns (total 1 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   A       100000 non-null  float64
dtypes: float64(1)
memory usage: 1.5 MB
None


Unnamed: 0,A
2013-01-01 00:00:00,-0.188308
2013-01-01 00:01:00,-0.908766
2013-01-01 00:02:00,3.289080
2013-01-01 00:03:00,1.546936
2013-01-01 00:04:00,-0.481675
...,...
2013-03-11 10:35:00,-0.467292
2013-03-11 10:36:00,-0.777721
2013-03-11 10:37:00,0.187142
2013-03-11 10:38:00,-2.921198


In [60]:
dft['2013-02-24']

Unnamed: 0,A
2013-02-24 00:00:00,0.337012
2013-02-24 00:01:00,1.048683
2013-02-24 00:02:00,-0.119736
2013-02-24 00:03:00,-1.232204
2013-02-24 00:04:00,0.382528
...,...
2013-02-24 23:55:00,-0.298344
2013-02-24 23:56:00,1.168321
2013-02-24 23:57:00,0.823045
2013-02-24 23:58:00,0.408961


In [61]:
rng2 = pd.date_range('2011-01-01', '2012-01-01', freq='W')

DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',
               '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',
               '2011-02-27', '2011-03-06', '2011-03-13', '2011-03-20',
               '2011-03-27', '2011-04-03', '2011-04-10', '2011-04-17',
               '2011-04-24', '2011-05-01', '2011-05-08', '2011-05-15',
               '2011-05-22', '2011-05-29', '2011-06-05', '2011-06-12',
               '2011-06-19', '2011-06-26', '2011-07-03', '2011-07-10',
               '2011-07-17', '2011-07-24', '2011-07-31', '2011-08-07',
               '2011-08-14', '2011-08-21', '2011-08-28', '2011-09-04',
               '2011-09-11', '2011-09-18', '2011-09-25', '2011-10-02',
               '2011-10-09', '2011-10-16', '2011-10-23', '2011-10-30',
               '2011-11-06', '2011-11-13', '2011-11-20', '2011-11-27',
               '2011-12-04', '2011-12-11', '2011-12-18', '2011-12-25',
               '2012-01-01'],
              dtype='datetime64[ns]', freq='W-S

In [62]:
ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)

2011-01-02   -0.188494
2011-01-09    1.667016
2011-01-16    0.505273
2011-01-23   -1.474057
2011-01-30   -0.498516
2011-02-06   -1.165027
2011-02-13   -0.337596
2011-02-20   -0.052104
2011-02-27    1.804285
2011-03-06    0.130294
2011-03-13    1.603325
2011-03-20    0.943623
2011-03-27   -0.003974
2011-04-03    0.110405
2011-04-10    2.206680
2011-04-17    0.034970
2011-04-24   -0.279702
2011-05-01   -0.368704
2011-05-08    0.735305
2011-05-15   -0.937733
2011-05-22   -0.397711
2011-05-29    1.616814
2011-06-05   -0.690936
2011-06-12   -0.574000
2011-06-19    0.180460
2011-06-26    0.207952
2011-07-03   -0.179582
2011-07-10    0.906417
2011-07-17   -0.798614
2011-07-24   -0.009809
2011-07-31   -1.647068
2011-08-07   -0.782783
2011-08-14   -1.944176
2011-08-21   -0.829244
2011-08-28   -1.935939
2011-09-04    0.848565
2011-09-11   -0.219305
2011-09-18   -0.859469
2011-09-25   -0.457441
2011-10-02    0.478476
2011-10-09   -2.136958
2011-10-16    0.696783
2011-10-23    0.890155
2011-10-30 

In [63]:
ts2.truncate(before='2011-11', after='2011-12')

2011-11-06    0.510827
2011-11-13   -0.708865
2011-11-20   -0.415951
2011-11-27    0.674801
Freq: W-SUN, dtype: float64

In [64]:
idx = pd.date_range(start='2019-12-29', freq='D', periods=4)

DatetimeIndex(['2019-12-29', '2019-12-30', '2019-12-31', '2020-01-01'], dtype='datetime64[ns]', freq='D')

In [65]:
idx.isocalendar()

Unnamed: 0,year,week,day
2019-12-29,2019,52,7
2019-12-30,2020,1,1
2019-12-31,2020,1,2
2020-01-01,2020,1,3


In [66]:
idx.to_series().dt.isocalendar()

Unnamed: 0,year,week,day
2019-12-29,2019,52,7
2019-12-30,2020,1,1
2019-12-31,2020,1,2
2020-01-01,2020,1,3


In [67]:
ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki')

Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki')

In [68]:
ts + pd.Timedelta(days=1)

Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')

In [69]:
ts + pd.DateOffset(days=1)

Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')

In [70]:
friday = pd.Timestamp('2018-01-05')

Timestamp('2018-01-05 00:00:00')

In [71]:
friday.day_name()

'Friday'

In [72]:
two_business_days = 2 * pd.offsets.BDay()

<2 * BusinessDays>

In [74]:
d1 = two_business_days.apply(friday)
z(d1, d1.day_name())

----------------------------------------
Type: <class 'pandas._libs.tslibs.timestamps.Timestamp'>


Timestamp('2018-01-09 00:00:00')

----------------------------------------
Type: <class 'str'>
Length: 7


'Tuesday'

In [181]:
ts = pd.Timestamp('2018-01-06 00:00:00')

Timestamp('2018-01-06 00:00:00')

In [182]:
bday_off = pd.offsets.BusinessHour()

<BusinessHour: BH=09:00-17:00>

In [185]:
bd2 = 3*bday_off
z(bd2, ts + bd2)


----------------------------------------
Type: <class 'pandas._libs.tslibs.offsets.BusinessHour'>


<3 * BusinessHours: BH=09:00-17:00>

----------------------------------------
Type: <class 'pandas._libs.tslibs.timestamps.Timestamp'>


Timestamp('2018-01-08 12:00:00')

In [76]:
ts.day_name()

'Saturday'

In [77]:
offset = pd.offsets.BusinessHour(start='09:00')

<BusinessHour: BH=09:00-17:00>

In [187]:
from datetime import date
import calendar

In [197]:
d = datetime.datetime(2008, 8, 22, 9, 0)
calendar.day_name[d.weekday()]

'Friday'

In [198]:
pd4 = pd.offsets.Week(weekday=4)

<Week: weekday=4>

In [199]:
d1 = pd4.rollforward(d)

Timestamp('2008-08-22 09:00:00')

In [200]:
d2 = d + pd4

Timestamp('2008-08-29 09:00:00')

In [193]:
d2 = d + 2* pd4
z(d2, d2.dayofweek)

----------------------------------------
Type: <class 'pandas._libs.tslibs.timestamps.Timestamp'>


Timestamp('2008-09-05 09:00:00')

----------------------------------------
Type: <class 'int'>


4

In [78]:
offset.rollforward(ts)

Timestamp('2018-01-08 09:00:00')

In [208]:
d = datetime.datetime(2008, 8, 18, 9, 0)
calendar.day_name[d.weekday()]

'Monday'

In [212]:
d1 = d + pd.offsets.Week()
print(d1, d1 - d)

2008-08-25 09:00:00 7 days 00:00:00


In [213]:
d2 = d + pd.offsets.Week(weekday=4)
print(d2, d2 - d)

2008-08-22 09:00:00 4 days 00:00:00


In [214]:
d3 = d + 2* pd.offsets.Week(weekday=4)
print(d3, d3 - d)

2008-08-29 09:00:00 11 days 00:00:00


In [79]:
from pandas.tseries.holiday import USFederalHolidayCalendar

In [80]:
USFederalHolidayCalendar

pandas.tseries.holiday.USFederalHolidayCalendar

In [81]:
vars(USFederalHolidayCalendar)

mappingproxy({'__module__': 'pandas.tseries.holiday',
              '__doc__': '\n    US Federal Government Holiday Calendar based on rules specified by:\n    https://www.opm.gov/policy-data-oversight/\n       snow-dismissal-procedures/federal-holidays/\n    ',
              'rules': [Holiday: New Years Day (month=1, day=1, observance=<function nearest_workday at 0x1292BAD8>),
               Holiday: Martin Luther King Jr. Day (month=1, day=1, offset=<DateOffset: weekday=MO(+3)>),
               Holiday: Presidents Day (month=2, day=1, offset=<DateOffset: weekday=MO(+3)>),
               Holiday: Memorial Day (month=5, day=31, offset=<DateOffset: weekday=MO(-1)>),
               Holiday: July 4th (month=7, day=4, observance=<function nearest_workday at 0x1292BAD8>),
               Holiday: Labor Day (month=9, day=1, offset=<DateOffset: weekday=MO(+1)>),
               Holiday: Columbus Day (month=10, day=1, offset=<DateOffset: weekday=MO(+2)>),
               Holiday: Veterans Day (mon

In [82]:
ts = pd.Series(range(len(rng)), index=rng)

2011-01-31     0
2011-02-28     1
2011-03-31     2
2011-04-29     3
2011-05-31     4
2011-06-30     5
2011-07-29     6
2011-08-31     7
2011-09-30     8
2011-10-31     9
2011-11-30    10
2011-12-30    11
Freq: BM, dtype: int64

In [215]:
rng = pd.date_range('2012-01-01', '2012-01-03')
ts = pd.Series(range(len(rng)), index=rng)

2012-01-01    0
2012-01-02    1
2012-01-03    2
Freq: D, dtype: int64

In [216]:
ts = ts[:5]

2012-01-01    0
2012-01-02    1
2012-01-03    2
Freq: D, dtype: int64

In [217]:
print(ts.shift(1))
print(ts.shift(-1))

2012-01-01    NaN
2012-01-02    0.0
2012-01-03    1.0
Freq: D, dtype: float64
2012-01-01    1.0
2012-01-02    2.0
2012-01-03    NaN
Freq: D, dtype: float64


In [92]:
ts.shift(5, freq='D')

2012-01-06    0
2012-01-07    1
2012-01-08    2
Freq: D, dtype: int64

In [218]:
ts2 = pd.date_range('01-29-2020', '02-02-2020')

DatetimeIndex(['2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01',
               '2020-02-02'],
              dtype='datetime64[ns]', freq='D')

In [220]:
s2 = pd.DataFrame(range(len(ts2)), index=ts2)

Unnamed: 0,0
2020-01-29,0
2020-01-30,1
2020-01-31,2
2020-02-01,3
2020-02-02,4


In [221]:
s2.shift(freq='M')

Unnamed: 0,0
2020-01-31,0
2020-01-31,1
2020-02-29,2
2020-02-29,3
2020-02-29,4


In [225]:
dr = pd.date_range('1/1/2010', periods=3, freq=3 * pd.offsets.BDay())
z(dr)

----------------------------------------
Type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
Length: 3


DatetimeIndex(['2010-01-01', '2010-01-06', '2010-01-11'], dtype='datetime64[ns]', freq='3B')

In [226]:
ts = pd.Series(np.random.randn(3), index=dr)

2010-01-01   -1.142274
2010-01-06    1.953736
2010-01-11    1.554070
Freq: 3B, dtype: float64

In [227]:
ts.asfreq(pd.offsets.BDay())

2010-01-01   -1.142274
2010-01-04         NaN
2010-01-05         NaN
2010-01-06    1.953736
2010-01-07         NaN
2010-01-08         NaN
2010-01-11    1.554070
Freq: B, dtype: float64

In [228]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 00:00:01',
               '2012-01-01 00:00:02', '2012-01-01 00:00:03',
               '2012-01-01 00:00:04', '2012-01-01 00:00:05',
               '2012-01-01 00:00:06', '2012-01-01 00:00:07',
               '2012-01-01 00:00:08', '2012-01-01 00:00:09',
               '2012-01-01 00:00:10', '2012-01-01 00:00:11',
               '2012-01-01 00:00:12', '2012-01-01 00:00:13',
               '2012-01-01 00:00:14', '2012-01-01 00:00:15',
               '2012-01-01 00:00:16', '2012-01-01 00:00:17',
               '2012-01-01 00:00:18', '2012-01-01 00:00:19',
               '2012-01-01 00:00:20', '2012-01-01 00:00:21',
               '2012-01-01 00:00:22', '2012-01-01 00:00:23',
               '2012-01-01 00:00:24', '2012-01-01 00:00:25',
               '2012-01-01 00:00:26', '2012-01-01 00:00:27',
               '2012-01-01 00:00:28', '2012-01-01 00:00:29',
               '2012-01-01 00:00:30', '2012-01-01 00:00:31',
               '2012-01-

In [229]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)

2012-01-01 00:00:00    165
2012-01-01 00:00:01    239
2012-01-01 00:00:02    109
2012-01-01 00:00:03     14
2012-01-01 00:00:04    422
                      ... 
2012-01-01 00:01:35    147
2012-01-01 00:01:36    344
2012-01-01 00:01:37    483
2012-01-01 00:01:38    146
2012-01-01 00:01:39    214
Freq: S, Length: 100, dtype: int32

In [230]:
ts.resample('5Min').sum()

2012-01-01    23176
Freq: 5T, dtype: int32

In [104]:
ts.resample('5Min').mean()

2012-01-01    272.75
Freq: 5T, dtype: float64

In [105]:
ts.resample('5Min').ohlc()

Unnamed: 0,open,high,low,close
2012-01-01,433,494,8,404


In [106]:
ts.resample('5Min').max()

2012-01-01    494
Freq: 5T, dtype: int32

In [107]:
ts.resample('5Min', closed='right')

<pandas.core.resample.DatetimeIndexResampler object at 0x122C2400>

In [233]:
print(ts.index.min(), '-->', ts.index.max(), '\n' )
print(ts.resample('5Min', label='left', closed='left').count(),'\n')
print(ts.resample('5Min', label='right', closed='left').count(),'\n')
print(ts.resample('5Min', label='left', closed='right').count(),'\n')
print(ts.resample('5Min', label='right', closed='right').count(),'\n')

2012-01-01 00:00:00 --> 2012-01-01 00:01:39 

2012-01-01    100
Freq: 5T, dtype: int64 

2012-01-01 00:05:00    100
Freq: 5T, dtype: int64 

2011-12-31 23:55:00     1
2012-01-01 00:00:00    99
Freq: 5T, dtype: int64 

2012-01-01 00:00:00     1
2012-01-01 00:05:00    99
Freq: 5T, dtype: int64 



In [109]:
ts[:2].resample('250L').asfreq()

2012-01-01 00:00:00.000    433.0
2012-01-01 00:00:00.250      NaN
2012-01-01 00:00:00.500      NaN
2012-01-01 00:00:00.750      NaN
2012-01-01 00:00:01.000    396.0
Freq: 250L, dtype: float64

In [110]:
ts[:2].resample('250L').ffill()

2012-01-01 00:00:00.000    433
2012-01-01 00:00:00.250    433
2012-01-01 00:00:00.500    433
2012-01-01 00:00:00.750    433
2012-01-01 00:00:01.000    396
Freq: 250L, dtype: int32

In [111]:
 rng = pd.date_range('2014-1-1', periods=100, freq='D') + pd.Timedelta('1s')

DatetimeIndex(['2014-01-01 00:00:01', '2014-01-02 00:00:01',
               '2014-01-03 00:00:01', '2014-01-04 00:00:01',
               '2014-01-05 00:00:01', '2014-01-06 00:00:01',
               '2014-01-07 00:00:01', '2014-01-08 00:00:01',
               '2014-01-09 00:00:01', '2014-01-10 00:00:01',
               '2014-01-11 00:00:01', '2014-01-12 00:00:01',
               '2014-01-13 00:00:01', '2014-01-14 00:00:01',
               '2014-01-15 00:00:01', '2014-01-16 00:00:01',
               '2014-01-17 00:00:01', '2014-01-18 00:00:01',
               '2014-01-19 00:00:01', '2014-01-20 00:00:01',
               '2014-01-21 00:00:01', '2014-01-22 00:00:01',
               '2014-01-23 00:00:01', '2014-01-24 00:00:01',
               '2014-01-25 00:00:01', '2014-01-26 00:00:01',
               '2014-01-27 00:00:01', '2014-01-28 00:00:01',
               '2014-01-29 00:00:01', '2014-01-30 00:00:01',
               '2014-01-31 00:00:01', '2014-02-01 00:00:01',
               '2014-02-

In [112]:
ts = pd.Series(range(100), index=rng)

2014-01-01 00:00:01     0
2014-01-02 00:00:01     1
2014-01-03 00:00:01     2
2014-01-04 00:00:01     3
2014-01-05 00:00:01     4
                       ..
2014-04-06 00:00:01    95
2014-04-07 00:00:01    96
2014-04-08 00:00:01    97
2014-04-09 00:00:01    98
2014-04-10 00:00:01    99
Freq: D, Length: 100, dtype: int64

In [113]:
 df = pd.DataFrame(np.random.randn(1000, 3),
   .....:                   index=pd.date_range('1/1/2012', freq='S', periods=1000),
   .....:                   columns=['A', 'B', 'C'])

Unnamed: 0,A,B,C
2012-01-01 00:00:00,0.384204,-2.454906,1.235091
2012-01-01 00:00:01,-0.958532,0.031057,-1.009663
2012-01-01 00:00:02,0.123029,-0.391049,-0.760514
2012-01-01 00:00:03,-0.220247,0.590073,0.369379
2012-01-01 00:00:04,1.141089,-0.486376,-1.470980
...,...,...,...
2012-01-01 00:16:35,-0.263813,-0.374628,-0.651111
2012-01-01 00:16:36,0.443566,1.265355,-0.320721
2012-01-01 00:16:37,0.585116,-0.161443,-1.109152
2012-01-01 00:16:38,0.499554,-1.990193,-0.335773


In [114]:
r = df.resample('3T')

<pandas.core.resample.DatetimeIndexResampler object at 0x10F9D298>

In [115]:
small = pd.Series(
   .....:     range(6),
   .....:     index=pd.to_datetime(['2017-01-01T00:00:00',
   .....:                           '2017-01-01T00:30:00',
   .....:                           '2017-01-01T00:31:00',
   .....:                           '2017-01-01T01:00:00',
   .....:                           '2017-01-01T03:00:00',
   .....:                           '2017-01-01T03:05:00'])
   .....: )

2017-01-01 00:00:00    0
2017-01-01 00:30:00    1
2017-01-01 00:31:00    2
2017-01-01 01:00:00    3
2017-01-01 03:00:00    4
2017-01-01 03:05:00    5
dtype: int64

In [116]:
resampled = small.resample('H')

<pandas.core.resample.DatetimeIndexResampler object at 0x122C0B98>

In [117]:
for name, group in resampled:
   .....:     print("Group: ", name)
   .....:     print("-" * 27)
   .....:     print(group, end="\n\n")

Group:  2017-01-01 00:00:00
---------------------------
2017-01-01 00:00:00    0
2017-01-01 00:30:00    1
2017-01-01 00:31:00    2
dtype: int64

Group:  2017-01-01 01:00:00
---------------------------
2017-01-01 01:00:00    3
dtype: int64

Group:  2017-01-01 02:00:00
---------------------------
Series([], dtype: int64)

Group:  2017-01-01 03:00:00
---------------------------
2017-01-01 03:00:00    4
2017-01-01 03:05:00    5
dtype: int64



In [118]:
start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'

In [119]:
middle = '2000-10-02 00:00:00'

'2000-10-02 00:00:00'

In [120]:
rng = pd.date_range(start, end, freq='7min')

DatetimeIndex(['2000-10-01 23:30:00', '2000-10-01 23:37:00',
               '2000-10-01 23:44:00', '2000-10-01 23:51:00',
               '2000-10-01 23:58:00', '2000-10-02 00:05:00',
               '2000-10-02 00:12:00', '2000-10-02 00:19:00',
               '2000-10-02 00:26:00'],
              dtype='datetime64[ns]', freq='7T')

In [121]:
ts = pd.Series(np.arange(len(rng)) * 3, index=rng)

2000-10-01 23:30:00     0
2000-10-01 23:37:00     3
2000-10-01 23:44:00     6
2000-10-01 23:51:00     9
2000-10-01 23:58:00    12
2000-10-02 00:05:00    15
2000-10-02 00:12:00    18
2000-10-02 00:19:00    21
2000-10-02 00:26:00    24
Freq: 7T, dtype: int32

In [122]:
pd.Period('2012', freq='A-DEC')

Period('2012', 'A-DEC')

In [123]:
p = pd.Period('2012', freq='A-DEC')

Period('2012', 'A-DEC')

In [124]:
p +1

Period('2013', 'A-DEC')

In [125]:
p -3

Period('2009', 'A-DEC')

In [126]:
p = pd.Period('2012-01', freq='2M')

Period('2012-01', '2M')

In [127]:
p + 2

Period('2012-05', '2M')

In [128]:
p -1

Period('2011-11', '2M')

In [129]:
p == pd.Period('2012-01', freq='3M')

IncompatibleFrequency: Input has different freq=3M from Period(freq=2M)

In [133]:
p2 = pd.Period('2012-01', freq='2M')
p3 = pd.Period('2011-01', freq='2M')
print((p == p2), (p is p2) )
print((p == p3), (p is p3) )

True False
False False


In [134]:
prng = pd.period_range('1/1/2011', '1/1/2012', freq='M')

PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',
             '2012-01'],
            dtype='period[M]', freq='M')

In [234]:
p1 = pd.Period('2012', freq='A-DEC')

Period('2012', 'A-DEC')

In [235]:
p2 = pd.Period('2012-1-1', freq='D')

Period('2012-01-01', 'D')

In [236]:
p3 = pd.Period('2012-1-1 19:00', freq='H')

Period('2012-01-01 19:00', 'H')

In [237]:
p4 = pd.Period('2012-1-1 19:00', freq='5H')

Period('2012-01-01 19:00', '5H')

In [238]:
p = pd.Period('2014-07-01 09:00', freq='H')

Period('2014-07-01 09:00', 'H')

In [242]:
p + pd.offsets.Hour(2)

Period('2014-07-01 11:00', 'H')

In [245]:
p + datetime.timedelta(minutes=60)

Period('2014-07-01 10:00', 'H')

In [246]:
p + np.timedelta64(7200, 's')

Period('2014-07-01 11:00', 'H')

In [247]:
p = pd.Period('2014-07', freq='M')

Period('2014-07', 'M')

In [248]:
p + pd.offsets.MonthEnd(3)

Period('2014-10', 'M')

In [249]:
prng = pd.period_range('1/1/2011', '1/1/2012', freq='M')

PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',
             '2012-01'],
            dtype='period[M]', freq='M')

In [251]:
ps = pd.Series(np.random.randn(len(prng)), prng)
z(ps, ps.index)

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 13


2011-01   -0.011362
2011-02   -0.619118
2011-03   -1.788975
2011-04    0.706156
2011-05   -1.920391
2011-06   -0.594074
2011-07    0.195518
2011-08    1.577161
2011-09   -0.608735
2011-10   -0.181588
2011-11   -0.405430
2011-12    0.138490
2012-01    0.388799
Freq: M, dtype: float64

----------------------------------------
Type: <class 'pandas.core.indexes.period.PeriodIndex'>
Length: 13


PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',
             '2012-01'],
            dtype='period[M]', freq='M')

In [4]:
a = pd.Timedelta('1 days')

Timedelta('1 days 00:00:00')

In [5]:
b = pd.Timedelta('1 day')

Timedelta('1 days 00:00:00')

In [6]:
a = pd.Timedelta('-1 days 2 min 3us')

Timedelta('-2 days +23:57:59.999997')

In [7]:
b = pd.Timedelta('-2 min 3us')

Timedelta('-1 days +23:57:59.999997')

In [8]:
a = pd.Timedelta('1 day 1 second')

Timedelta('1 days 00:00:01')

In [69]:
a = pd.Timedelta.min
b = pd.Timedelta.max
c = pd.Series([a, b])
z(c)
c.astype('timedelta64[M]')

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 2


0   -106752 days +00:12:43.145224193
1     106751 days 23:47:16.854775807
dtype: timedelta64[ns]

0    3506.0
1    3507.0
dtype: float64

In [73]:
a = pd.Series(pd.Timedelta('10 min'))
print(a.astype('timedelta64[s]'))
print(a.astype('timedelta64[M]'))


0    600.0
dtype: float64
0    0.0
dtype: float64


In [61]:
a.astype('timedelta64[D]')

AttributeError: 'Timedelta' object has no attribute 'astype'

In [14]:
repr(a)

"Timedelta('-106752 days +00:12:43.145224193')"

In [19]:
b = pd.Timedelta(a, 'hours')

Timedelta('-106752 days +00:12:43.145224193')

In [18]:
a.view()

TypeError: view() takes exactly one argument (0 given)

In [21]:
pd.Timedelta('1 ms')

Timedelta('0 days 00:00:00.001000')

In [22]:
s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D'))

0   2012-01-01
1   2012-01-02
2   2012-01-03
dtype: datetime64[ns]

In [23]:
td = pd.Series([pd.Timedelta(days=i) for i in range(3)])

0   0 days
1   1 days
2   2 days
dtype: timedelta64[ns]

In [24]:
df = pd.DataFrame({'A': s, 'B': td})

Unnamed: 0,A,B
0,2012-01-01,0 days
1,2012-01-02,1 days
2,2012-01-03,2 days


In [25]:
df['C'] = df['A'] + df['B']

In [26]:
df

Unnamed: 0,A,B,C
0,2012-01-01,0 days,2012-01-01
1,2012-01-02,1 days,2012-01-03
2,2012-01-03,2 days,2012-01-05


In [27]:
s - s.max()

0   -2 days
1   -1 days
2    0 days
dtype: timedelta64[ns]

In [28]:
s

0   2012-01-01
1   2012-01-02
2   2012-01-03
dtype: datetime64[ns]

In [29]:
s.shift()

0          NaT
1   2012-01-01
2   2012-01-02
dtype: datetime64[ns]

In [30]:
y = s - s.shift()

0      NaT
1   1 days
2   1 days
dtype: timedelta64[ns]

In [31]:
a = datetime.datetime(2011, 1, 1, 3, 5)

datetime.datetime(2011, 1, 1, 3, 5)

In [35]:
b = datetime.timedelta(hours=3, minutes=6)

datetime.timedelta(seconds=11160)

In [36]:
a - b

datetime.datetime(2010, 12, 31, 23, 59)

In [37]:
A = s - pd.Timestamp('20120101') - pd.Timedelta('00:05:05')
B = s - pd.Series(pd.date_range('2012-1-2', periods=3, freq='D'))
df = pd.DataFrame({'A': A, 'B': B})

Unnamed: 0,A,B
0,-1 days +23:54:55,-1 days
1,0 days 23:54:55,-1 days
2,1 days 23:54:55,-1 days


In [39]:
pd.Timedelta('-00:05:05') + pd.Timedelta('01:00:00')

Timedelta('0 days 00:54:55')

In [40]:
df.min()

A   -1 days +23:54:55
B   -1 days +00:00:00
dtype: timedelta64[ns]

In [41]:
df.idxmin()

A    0
B    0
dtype: int64

In [42]:
z(df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype          
---  ------  --------------  -----          
 0   A       3 non-null      timedelta64[ns]
 1   B       3 non-null      timedelta64[ns]
dtypes: timedelta64[ns](2)
memory usage: 112.0 bytes
None


Unnamed: 0,A,B
0,-1 days +23:54:55,-1 days
1,0 days 23:54:55,-1 days
2,1 days 23:54:55,-1 days


In [43]:
a = pd.Timedelta('-1 days')
b = pd.Timedelta('-2 days')
min(a, b)

Timedelta('-2 days +00:00:00')

In [44]:
df

Unnamed: 0,A,B
0,-1 days +23:54:55,-1 days
1,0 days 23:54:55,-1 days
2,1 days 23:54:55,-1 days


In [45]:
df.min()

A   -1 days +23:54:55
B   -1 days +00:00:00
dtype: timedelta64[ns]

In [46]:
df.min(axis=1)

0   -1 days
1   -1 days
2   -1 days
dtype: timedelta64[ns]

In [47]:
df.idxmin()

A    0
B    0
dtype: int64

In [48]:
df.idxmin(axis=1)

0    B
1    B
2    B
dtype: object

In [49]:
y

0      NaT
1   1 days
2   1 days
dtype: timedelta64[ns]

In [50]:
 y.fillna(pd.Timedelta(0))

0   0 days
1   1 days
2   1 days
dtype: timedelta64[ns]

In [51]:
y2 = pd.Series(pd.to_timedelta(['-1 days +00:00:05', 'nat',
   ....:                                 '-1 days +00:00:05', '1 days']))

0   -1 days +00:00:05
1                 NaT
2   -1 days +00:00:05
3     1 days 00:00:00
dtype: timedelta64[ns]

In [74]:
december = pd.Series(pd.date_range('20121201', periods=4))

0   2012-12-01
1   2012-12-02
2   2012-12-03
3   2012-12-04
dtype: datetime64[ns]

In [75]:
january = pd.Series(pd.date_range('20130101', periods=4))

0   2013-01-01
1   2013-01-02
2   2013-01-03
3   2013-01-04
dtype: datetime64[ns]

In [76]:
td = january - december

0   31 days
1   31 days
2   31 days
3   31 days
dtype: timedelta64[ns]

In [77]:
td[2] += datetime.timedelta(minutes=5, seconds=3)

In [78]:
td

0   31 days 00:00:00
1   31 days 00:00:00
2   31 days 00:05:03
3   31 days 00:00:00
dtype: timedelta64[ns]

In [79]:
td[3] = np.nan
td

0   31 days 00:00:00
1   31 days 00:00:00
2   31 days 00:05:03
3                NaT
dtype: timedelta64[ns]

In [80]:
td / np.timedelta64(1, 'D')

0    31.000000
1    31.000000
2    31.003507
3          NaN
dtype: float64

In [81]:
td.dtype

dtype('<m8[ns]')

In [82]:
type(td)

pandas.core.series.Series

In [84]:
a = td.astype('timedelta64[D]')

0    31.0
1    31.0
2    31.0
3     NaN
dtype: float64

In [85]:
z(a)

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 4


0    31.0
1    31.0
2    31.0
3     NaN
dtype: float64

In [86]:
td / np.timedelta64(1, 's')

0    2678400.0
1    2678400.0
2    2678703.0
3          NaN
dtype: float64

In [87]:
td.astype('timedelta64[s]')

0    2678400.0
1    2678400.0
2    2678703.0
3          NaN
dtype: float64

In [91]:
a = np.timedelta64(1, 'M')
type(a)
print(a)

1 months


In [94]:
a.astype('timedelta64[D]')

numpy.timedelta64(30,'D')

In [95]:
30*12

360

In [101]:
a = np.timedelta64(4, 'Y')


numpy.timedelta64(4,'Y')

In [102]:
a.astype('timedelta64[D]')

numpy.timedelta64(1460,'D')

In [103]:
1460/365

4.0

In [133]:
a = np.timedelta64(5000, 'Y')
a_float = a.astype(float)
b = a.astype('timedelta64[M]')
b_float = b.astype(float)
c = a.astype('timedelta64[D]')
c_float = c.astype(float)

print(type(a), a)
print(type(b), b)
print(type(c), c)

<class 'numpy.timedelta64'> 5000 years
<class 'numpy.timedelta64'> 60000 months
<class 'numpy.timedelta64'> 1826212 days


In [134]:
e = c_float/ b_float

30.436866666666667

In [135]:
f = c_float / a_float

365.2424

In [136]:
td

0   31 days 00:00:00
1   31 days 00:00:00
2   31 days 00:05:03
3                NaT
dtype: timedelta64[ns]

In [137]:
td * -1

0   -31 days +00:00:00
1   -31 days +00:00:00
2   -32 days +23:54:57
3                  NaT
dtype: timedelta64[ns]

In [138]:
td * pd.Series([1, 2, 3, 4])

0   31 days 00:00:00
1   62 days 00:00:00
2   93 days 00:15:09
3                NaT
dtype: timedelta64[ns]

In [139]:
td.dt.days

0    31.0
1    31.0
2    31.0
3     NaN
dtype: float64

In [140]:
td.dt.components

Unnamed: 0,days,hours,minutes,seconds,milliseconds,microseconds,nanoseconds
0,31.0,0.0,0.0,0.0,0.0,0.0,0.0
1,31.0,0.0,0.0,0.0,0.0,0.0,0.0
2,31.0,0.0,5.0,3.0,0.0,0.0,0.0
3,,,,,,,


In [141]:
td.dt.seconds

0      0.0
1      0.0
2    303.0
3      NaN
dtype: float64

In [142]:
tds = pd.Timedelta('31 days 5 min 3 sec')

Timedelta('31 days 00:05:03')

In [143]:
pd.TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64(2, 'D'),
   ....:                    datetime.timedelta(days=2, seconds=2)])

TimedeltaIndex(['1 days 00:00:00', '1 days 00:00:05', '2 days 00:00:00',
                '2 days 00:00:02'],
               dtype='timedelta64[ns]', freq=None)

In [144]:
pd.timedelta_range(start='1 days', periods=5)

TimedeltaIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], dtype='timedelta64[ns]', freq='D')

In [145]:
tdi = pd.TimedeltaIndex(['1 days', pd.NaT, '2 days'])

TimedeltaIndex(['1 days', NaT, '2 days'], dtype='timedelta64[ns]', freq=None)

In [146]:
tdi.to_list()

[Timedelta('1 days 00:00:00'), NaT, Timedelta('2 days 00:00:00')]

In [147]:
 dti = pd.date_range('20130101', periods=3)

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03'], dtype='datetime64[ns]', freq='D')

In [148]:
dti.to_list()

[Timestamp('2013-01-01 00:00:00', freq='D'),
 Timestamp('2013-01-02 00:00:00', freq='D'),
 Timestamp('2013-01-03 00:00:00', freq='D')]

In [149]:
tdi+dti

DatetimeIndex(['2013-01-02', 'NaT', '2013-01-05'], dtype='datetime64[ns]', freq=None)

In [150]:
s

0   2012-01-01
1   2012-01-02
2   2012-01-03
dtype: datetime64[ns]

In [151]:
s = pd.Series(np.arange(100),
   .....:               index=pd.timedelta_range('1 days', periods=100, freq='h'))

1 days 00:00:00     0
1 days 01:00:00     1
1 days 02:00:00     2
1 days 03:00:00     3
1 days 04:00:00     4
                   ..
4 days 23:00:00    95
5 days 00:00:00    96
5 days 01:00:00    97
5 days 02:00:00    98
5 days 03:00:00    99
Freq: H, Length: 100, dtype: int32

In [152]:
s.resample('D').count()

1 days    24
2 days    24
3 days    24
4 days    24
5 days     4
Freq: D, dtype: int64

In [153]:
#Sparse data structures

In [154]:
arr = np.random.randn(10)

array([-1.59199979, -0.20509999,  0.58148891, -0.21881694, -1.98507733,
        1.10520352,  0.32786313, -0.63208972,  0.53675894,  1.65468091])

In [155]:
arr[2:-2] = np.nan

In [156]:
ts = pd.Series(pd.arrays.SparseArray(arr))

0   -1.592000
1   -0.205100
2         NaN
3         NaN
4         NaN
5         NaN
6         NaN
7         NaN
8    0.536759
9    1.654681
dtype: Sparse[float64, nan]

In [157]:
z(ts)

----------------------------------------
Type: <class 'pandas.core.series.Series'>
Length: 10


0   -1.592000
1   -0.205100
2         NaN
3         NaN
4         NaN
5         NaN
6         NaN
7         NaN
8    0.536759
9    1.654681
dtype: Sparse[float64, nan]

In [158]:
df = pd.DataFrame(np.random.randn(10000, 4))

Unnamed: 0,0,1,2,3
0,0.369604,1.065111,0.140481,1.314709
1,1.263720,1.175696,1.278320,0.101929
2,-1.089297,0.014340,1.175882,0.813249
3,0.897332,0.209028,0.209997,1.045556
4,0.397578,0.207756,0.389767,-0.962022
...,...,...,...,...
9995,-0.206049,-0.423061,-0.108680,0.371608
9996,0.109699,-0.457318,-0.192051,-0.408174
9997,-2.359818,-0.346883,-1.169595,0.991021
9998,0.477989,0.927302,-0.982146,1.005736


In [165]:
df.iloc[:9998] = np.nan
df.iloc[9998:, 0] = np.nan

In [166]:
sdf = df.astype(pd.SparseDtype("float", np.nan))

Unnamed: 0,0,1,2,3
0,,,,
1,,,,
2,,,,
3,,,,
4,,,,
...,...,...,...,...
9995,,,,
9996,,,,
9997,,,,
9998,,0.927302,-0.982146,1.005736


In [167]:
sdf.dtypes

0    Sparse[float64, nan]
1    Sparse[float64, nan]
2    Sparse[float64, nan]
3    Sparse[float64, nan]
dtype: object

In [168]:
sdf.sparse.density

0.00015000000000000001

In [174]:
a = df.memory_usage().sum() / 1e3

320.064

In [175]:
b = sdf.memory_usage().sum()/ 1e3

0.136

In [173]:
b/a

0.0004249150169966007

In [176]:
dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',
   ...:           'complex128', 'object', 'bool']

['int64',
 'float64',
 'datetime64[ns]',
 'timedelta64[ns]',
 'complex128',
 'object',
 'bool']

In [178]:
n = 5000

data = {t: np.random.randint(100, size=n).astype(t) for t in dtypes}

{'int64': array([14, 45,  3, ..., 10, 11, 72], dtype=int64),
 'float64': array([79., 22., 91., ..., 26., 11., 90.]),
 'datetime64[ns]': array(['1970-01-01T00:00:00.000000027', '1970-01-01T00:00:00.000000040',
        '1970-01-01T00:00:00.000000044', ...,
        '1970-01-01T00:00:00.000000086', '1970-01-01T00:00:00.000000013',
        '1970-01-01T00:00:00.000000027'], dtype='datetime64[ns]'),
 'timedelta64[ns]': array([88, 48, 65, ..., 47,  1, 38], dtype='timedelta64[ns]'),
 'complex128': array([26.+0.j, 11.+0.j, 22.+0.j, ...,  0.+0.j, 41.+0.j, 31.+0.j]),
 'object': array([88, 14, 23, ..., 76, 24, 22], dtype=object),
 'bool': array([ True,  True,  True, ...,  True,  True,  True])}

In [179]:
df = pd.DataFrame(data)

Unnamed: 0,int64,float64,datetime64[ns],timedelta64[ns],complex128,object,bool
0,14,79.0,1970-01-01 00:00:00.000000027,0 days 00:00:00.000000088,26.000000+0.000000j,88,True
1,45,22.0,1970-01-01 00:00:00.000000040,0 days 00:00:00.000000048,11.000000+0.000000j,14,True
2,3,91.0,1970-01-01 00:00:00.000000044,0 days 00:00:00.000000065,22.000000+0.000000j,23,True
3,39,70.0,1970-01-01 00:00:00.000000014,0 days 00:00:00.000000016,45.000000+0.000000j,76,True
4,1,39.0,1970-01-01 00:00:00.000000055,0 days 00:00:00.000000040,78.000000+0.000000j,81,True
...,...,...,...,...,...,...,...
4995,90,89.0,1970-01-01 00:00:00.000000058,0 days 00:00:00.000000084,66.000000+0.000000j,11,True
4996,52,9.0,1970-01-01 00:00:00.000000061,0 days 00:00:00.000000083,90.000000+0.000000j,43,True
4997,10,26.0,1970-01-01 00:00:00.000000086,0 days 00:00:00.000000047,0.000000+0.000000j,76,True
4998,11,11.0,1970-01-01 00:00:00.000000013,0 days 00:00:00.000000001,41.000000+0.000000j,24,True


In [180]:
df['categorical'] = df['object'].astype('category')

In [181]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype          
---  ------           --------------  -----          
 0   int64            5000 non-null   int64          
 1   float64          5000 non-null   float64        
 2   datetime64[ns]   5000 non-null   datetime64[ns] 
 3   timedelta64[ns]  5000 non-null   timedelta64[ns]
 4   complex128       5000 non-null   complex128     
 5   object           5000 non-null   object         
 6   bool             5000 non-null   bool           
 7   categorical      5000 non-null   category       
dtypes: bool(1), category(1), complex128(1), datetime64[ns](1), float64(1), int64(1), object(1), timedelta64[ns](1)
memory usage: 268.5+ KB


In [182]:
 df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype          
---  ------           --------------  -----          
 0   int64            5000 non-null   int64          
 1   float64          5000 non-null   float64        
 2   datetime64[ns]   5000 non-null   datetime64[ns] 
 3   timedelta64[ns]  5000 non-null   timedelta64[ns]
 4   complex128       5000 non-null   complex128     
 5   object           5000 non-null   object         
 6   bool             5000 non-null   bool           
 7   categorical      5000 non-null   category       
dtypes: bool(1), category(1), complex128(1), datetime64[ns](1), float64(1), int64(1), object(1), timedelta64[ns](1)
memory usage: 336.8 KB


In [186]:
df.memory_usage(deep=False)/1024

Index               0.062500
int64              39.062500
float64            39.062500
datetime64[ns]     39.062500
timedelta64[ns]    39.062500
complex128         78.125000
object             19.531250
bool                4.882812
categorical         9.664062
dtype: float64

In [187]:
df.memory_usage(deep=True)/1024

Index               0.062500
int64              39.062500
float64            39.062500
datetime64[ns]     39.062500
timedelta64[ns]    39.062500
complex128         78.125000
object             87.779297
bool                4.882812
categorical         9.664062
dtype: float64