#### Pandas Part 78: DatetimeIndex

This notebook explores the DatetimeIndex class, which is an immutable ndarray of datetime64 data that can be used for indexing in pandas.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, time

##### 1. Creating DatetimeIndex

There are several ways to create a DatetimeIndex:

In [2]:
# Create DatetimeIndex from a list of datetime-like strings
dates = ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04']
dt_idx = pd.DatetimeIndex(dates)
print(f"DatetimeIndex from strings: {dt_idx}")

# Create DatetimeIndex from datetime objects
dates = [datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 3)]
dt_idx = pd.DatetimeIndex(dates)
print(f"\nDatetimeIndex from datetime objects: {dt_idx}")

# Create DatetimeIndex using date_range
dt_idx = pd.date_range(start='2023-01-01', periods=5)
print(f"\nDatetimeIndex from date_range (daily): {dt_idx}")

# Create DatetimeIndex with a specific frequency
dt_idx = pd.date_range(start='2023-01-01', periods=5, freq='M')  # Monthly frequency
print(f"\nDatetimeIndex with monthly frequency: {dt_idx}")

DatetimeIndex from strings: DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04'], dtype='datetime64[ns]', freq=None)

DatetimeIndex from datetime objects: DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03'], dtype='datetime64[ns]', freq=None)

DatetimeIndex from date_range (daily): DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05'],
              dtype='datetime64[ns]', freq='D')

DatetimeIndex with monthly frequency: DatetimeIndex(['2023-01-31', '2023-02-28', '2023-03-31', '2023-04-30',
               '2023-05-31'],
              dtype='datetime64[ns]', freq='ME')


  dt_idx = pd.date_range(start='2023-01-01', periods=5, freq='M')  # Monthly frequency


### Creating DatetimeIndex with Time Zone

In [3]:
# Create DatetimeIndex with timezone
dt_idx = pd.DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03'], tz='UTC')
print(f"DatetimeIndex with UTC timezone: {dt_idx}")

# Create DatetimeIndex with a different timezone
dt_idx = pd.date_range(start='2023-01-01', periods=3, tz='US/Eastern')
print(f"\nDatetimeIndex with US/Eastern timezone: {dt_idx}")

# Convert timezone
dt_idx_pst = dt_idx.tz_convert('US/Pacific')
print(f"\nConverted to US/Pacific timezone: {dt_idx_pst}")

DatetimeIndex with UTC timezone: DatetimeIndex(['2023-01-01 00:00:00+00:00', '2023-01-02 00:00:00+00:00',
               '2023-01-03 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

DatetimeIndex with US/Eastern timezone: DatetimeIndex(['2023-01-01 00:00:00-05:00', '2023-01-02 00:00:00-05:00',
               '2023-01-03 00:00:00-05:00'],
              dtype='datetime64[ns, US/Eastern]', freq='D')

Converted to US/Pacific timezone: DatetimeIndex(['2022-12-31 21:00:00-08:00', '2023-01-01 21:00:00-08:00',
               '2023-01-02 21:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq='D')


### Handling Ambiguous Times

In [7]:
# Create a DatetimeIndex that includes a DST transition
# In most of Europe, clocks go back 1 hour on the last Sunday of October
# The ambiguous hour is typically 2:00-3:00 AM
try:
    # This will raise an exception for ambiguous times
    dt_idx = pd.DatetimeIndex(['2023-10-29 02:00:00', '2023-10-29 02:30:00'], tz='Europe/Berlin')
    print(f"DatetimeIndex with DST transition: {dt_idx}")
except Exception as e:
    print(f"Error: {e}")
    
# Handle ambiguous times with 'NaT'
dt_idx = pd.DatetimeIndex(['2023-10-29 02:00:00', '2023-10-29 02:30:00'], 
                          tz='Europe/Berlin', 
                          ambiguous='NaT')
print(f"\nHandling ambiguous times with 'NaT': {dt_idx}")

# Handle ambiguous times with boolean array
dt_idx = pd.DatetimeIndex(['2023-10-29 02:00:00', '2023-10-29 02:30:00'], 
                          tz='Europe/Berlin', 
                          ambiguous=[True, False])  # True for DST, False for non-DST
print(f"\nHandling ambiguous times with boolean array: {dt_idx}")

# Handle ambiguous times with a single boolean
dt_idx = pd.DatetimeIndex(['2023-10-29 02:00:00', '2023-10-29 02:30:00'], 
                          tz='Europe/Berlin', 
                          ambiguous=True)  # All times are DST
print(f"\nHandling ambiguous times with True (all DST): {dt_idx}")

Error: Cannot infer dst time from 2023-10-29 02:00:00, try using the 'ambiguous' argument

Handling ambiguous times with 'NaT': DatetimeIndex(['NaT', 'NaT'], dtype='datetime64[ns, Europe/Berlin]', freq=None)

Handling ambiguous times with boolean array: DatetimeIndex(['2023-10-29 02:00:00+02:00', '2023-10-29 02:30:00+01:00'], dtype='datetime64[ns, Europe/Berlin]', freq=None)

Handling ambiguous times with True (all DST): DatetimeIndex(['2023-10-29 02:00:00+02:00', '2023-10-29 02:30:00+02:00'], dtype='datetime64[ns, Europe/Berlin]', freq=None)


##### 2. DatetimeIndex Attributes

DatetimeIndex provides various attributes to access datetime components:

In [8]:
# Create a DatetimeIndex
dt_idx = pd.date_range(start='2023-01-01 12:30:45.123456', periods=3, freq='D')
print(f"DatetimeIndex: {dt_idx}")

# Access year component
print(f"\nYear: {dt_idx.year}")

# Access month component
print(f"Month: {dt_idx.month}")

# Access day component
print(f"Day: {dt_idx.day}")

# Access hour component
print(f"Hour: {dt_idx.hour}")

# Access minute component
print(f"Minute: {dt_idx.minute}")

# Access second component
print(f"Second: {dt_idx.second}")

# Access microsecond component
print(f"Microsecond: {dt_idx.microsecond}")

# Access nanosecond component
print(f"Nanosecond: {dt_idx.nanosecond}")

DatetimeIndex: DatetimeIndex(['2023-01-01 12:30:45.123456', '2023-01-02 12:30:45.123456',
               '2023-01-03 12:30:45.123456'],
              dtype='datetime64[ns]', freq='D')

Year: Index([2023, 2023, 2023], dtype='int32')
Month: Index([1, 1, 1], dtype='int32')
Day: Index([1, 2, 3], dtype='int32')
Hour: Index([12, 12, 12], dtype='int32')
Minute: Index([30, 30, 30], dtype='int32')
Second: Index([45, 45, 45], dtype='int32')
Microsecond: Index([123456, 123456, 123456], dtype='int32')
Nanosecond: Index([0, 0, 0], dtype='int32')


### Date and Time Components

In [9]:
# Create a DatetimeIndex
dt_idx = pd.date_range(start='2023-01-01 12:30:45', periods=3, freq='D')
print(f"DatetimeIndex: {dt_idx}")

# Get date component (returns numpy array of datetime.date objects)
dates = dt_idx.date
print(f"\nDate component: {dates}")
print(f"Type of first element: {type(dates[0])}")

# Get time component (returns numpy array of datetime.time objects)
times = dt_idx.time
print(f"\nTime component: {times}")
print(f"Type of first element: {type(times[0])}")

DatetimeIndex: DatetimeIndex(['2023-01-01 12:30:45', '2023-01-02 12:30:45',
               '2023-01-03 12:30:45'],
              dtype='datetime64[ns]', freq='D')

Date component: [datetime.date(2023, 1, 1) datetime.date(2023, 1, 2)
 datetime.date(2023, 1, 3)]
Type of first element: <class 'datetime.date'>

Time component: [datetime.time(12, 30, 45) datetime.time(12, 30, 45)
 datetime.time(12, 30, 45)]
Type of first element: <class 'datetime.time'>


In [10]:
# Create a DatetimeIndex with timezone
dt_idx = pd.date_range(start='2023-01-01 12:30:45', periods=3, freq='D', tz='UTC')
print(f"DatetimeIndex with timezone: {dt_idx}")

# Get time with timezone component
timetz = dt_idx.timetz
print(f"\nTime with timezone component: {timetz}")
print(f"Type of first element: {type(timetz[0])}")
print(f"Timezone of first element: {timetz[0].tzname()}")

DatetimeIndex with timezone: DatetimeIndex(['2023-01-01 12:30:45+00:00', '2023-01-02 12:30:45+00:00',
               '2023-01-03 12:30:45+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

Time with timezone component: [datetime.time(12, 30, 45, tzinfo=datetime.timezone.utc)
 datetime.time(12, 30, 45, tzinfo=datetime.timezone.utc)
 datetime.time(12, 30, 45, tzinfo=datetime.timezone.utc)]
Type of first element: <class 'datetime.time'>
Timezone of first element: UTC


### Calendar-Related Attributes

In [12]:
# Create a DatetimeIndex spanning multiple months
dt_idx = pd.date_range(start='2023-01-01', end='2023-03-31', freq='D')
print(f"DatetimeIndex: {dt_idx}")

# Get day of year
print(f"\nDay of year for first 5 dates: {dt_idx.dayofyear[:5]}")

# Get week of year using isocalendar()
print(f"Week of year for first 5 dates: {dt_idx.isocalendar().week[:5]}")

# Get day of week (0=Monday, 6=Sunday)
print(f"Day of week for first 5 dates: {dt_idx.dayofweek[:5]}")

# Get quarter
print(f"Quarter for first 5 dates: {dt_idx.quarter[:5]}")

DatetimeIndex: DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-08',
               '2023-01-09', '2023-01-10', '2023-01-11', '2023-01-12',
               '2023-01-13', '2023-01-14', '2023-01-15', '2023-01-16',
               '2023-01-17', '2023-01-18', '2023-01-19', '2023-01-20',
               '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24',
               '2023-01-25', '2023-01-26', '2023-01-27', '2023-01-28',
               '2023-01-29', '2023-01-30', '2023-01-31', '2023-02-01',
               '2023-02-02', '2023-02-03', '2023-02-04', '2023-02-05',
               '2023-02-06', '2023-02-07', '2023-02-08', '2023-02-09',
               '2023-02-10', '2023-02-11', '2023-02-12', '2023-02-13',
               '2023-02-14', '2023-02-15', '2023-02-16', '2023-02-17',
               '2023-02-18', '2023-02-19', '2023-02-20', '2023-02-21',
               '2023-02-22', '2023-02-23', '2023-02-24', '2023

##### 3. DatetimeIndex Methods

DatetimeIndex provides various methods for working with datetime data:

### month_name and day_name Methods

In [13]:
# Create a DatetimeIndex for monthly data
monthly_idx = pd.date_range(start='2023-01-31', freq='M', periods=3)
print(f"Monthly DatetimeIndex: {monthly_idx}")

# Get month names
month_names = monthly_idx.month_name()
print(f"\nMonth names: {month_names}")

# Get month names in a different locale
try:
    month_names_fr = monthly_idx.month_name(locale='fr_FR')
    print(f"Month names (French): {month_names_fr}")
except ImportError:
    print("French locale not available. Make sure you have the 'babel' package installed.")

Monthly DatetimeIndex: DatetimeIndex(['2023-01-31', '2023-02-28', '2023-03-31'], dtype='datetime64[ns]', freq='ME')

Month names: Index(['January', 'February', 'March'], dtype='object')
Month names (French): Index(['Janvier', 'Février', 'Mars'], dtype='object')


  monthly_idx = pd.date_range(start='2023-01-31', freq='M', periods=3)


In [14]:
# Create a DatetimeIndex for daily data
daily_idx = pd.date_range(start='2023-01-01', freq='D', periods=3)
print(f"Daily DatetimeIndex: {daily_idx}")

# Get day names
day_names = daily_idx.day_name()
print(f"\nDay names: {day_names}")

# Get day names in a different locale
try:
    day_names_es = daily_idx.day_name(locale='es_ES')
    print(f"Day names (Spanish): {day_names_es}")
except ImportError:
    print("Spanish locale not available. Make sure you have the 'babel' package installed.")

Daily DatetimeIndex: DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03'], dtype='datetime64[ns]', freq='D')

Day names: Index(['Sunday', 'Monday', 'Tuesday'], dtype='object')
Day names (Spanish): Index(['Domingo', 'Lunes', 'Martes'], dtype='object')


### mean Method

In [15]:
# Create a DatetimeIndex
dt_idx = pd.DatetimeIndex(['2023-01-01', '2023-01-03', '2023-01-05'])
print(f"DatetimeIndex: {dt_idx}")

# Calculate the mean
mean_date = dt_idx.mean()
print(f"\nMean date: {mean_date}")
print(f"Type: {type(mean_date)}")

DatetimeIndex: DatetimeIndex(['2023-01-01', '2023-01-03', '2023-01-05'], dtype='datetime64[ns]', freq=None)

Mean date: 2023-01-03 00:00:00
Type: <class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [16]:
# Create a DatetimeIndex with NaT (Not a Time)
dt_idx = pd.DatetimeIndex(['2023-01-01', '2023-01-03', 'NaT', '2023-01-05'])
print(f"DatetimeIndex with NaT: {dt_idx}")

# Calculate the mean (skipna=True by default)
mean_date = dt_idx.mean()
print(f"\nMean date (skipna=True): {mean_date}")

# Calculate the mean with skipna=False
try:
    mean_date = dt_idx.mean(skipna=False)
    print(f"Mean date (skipna=False): {mean_date}")
except ValueError as e:
    print(f"Error with skipna=False: {e}")

DatetimeIndex with NaT: DatetimeIndex(['2023-01-01', '2023-01-03', 'NaT', '2023-01-05'], dtype='datetime64[ns]', freq=None)

Mean date (skipna=True): 2023-01-03 00:00:00
Mean date (skipna=False): NaT


##### 4. Using DatetimeIndex with Series and DataFrame

In [17]:
# Create a Series with DatetimeIndex
dates = pd.date_range('2023-01-01', periods=5, freq='D')
s = pd.Series(range(5), index=dates)
print("Series with DatetimeIndex:")
print(s)

# Create a DataFrame with DatetimeIndex
df = pd.DataFrame({'A': range(5), 'B': range(5, 10)}, index=dates)
print("\nDataFrame with DatetimeIndex:")
print(df)

Series with DatetimeIndex:
2023-01-01    0
2023-01-02    1
2023-01-03    2
2023-01-04    3
2023-01-05    4
Freq: D, dtype: int64

DataFrame with DatetimeIndex:
            A  B
2023-01-01  0  5
2023-01-02  1  6
2023-01-03  2  7
2023-01-04  3  8
2023-01-05  4  9


### Selecting Data with DatetimeIndex

In [18]:
# Create a DataFrame with DatetimeIndex spanning multiple months
dates = pd.date_range('2023-01-01', periods=100, freq='D')
df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=dates)
print("DataFrame with DatetimeIndex:")
print(df.head())

# Select data for a specific date
print("\nData for 2023-01-05:")
print(df.loc['2023-01-05'])

# Select data for a specific month
print("\nData for January 2023:")
print(df.loc['2023-01'].head())

# Select data for a date range
print("\nData from 2023-01-15 to 2023-01-20:")
print(df.loc['2023-01-15':'2023-01-20'])

DataFrame with DatetimeIndex:
                   A         B
2023-01-01  1.278621  0.927983
2023-01-02 -1.283737  0.029830
2023-01-03  0.932724  0.384208
2023-01-04  2.177500 -0.138966
2023-01-05  2.759423  0.180388

Data for 2023-01-05:
A    2.759423
B    0.180388
Name: 2023-01-05 00:00:00, dtype: float64

Data for January 2023:
                   A         B
2023-01-01  1.278621  0.927983
2023-01-02 -1.283737  0.029830
2023-01-03  0.932724  0.384208
2023-01-04  2.177500 -0.138966
2023-01-05  2.759423  0.180388

Data from 2023-01-15 to 2023-01-20:
                   A         B
2023-01-15  0.572640  1.842491
2023-01-16  0.094303 -0.273746
2023-01-17  0.256379  0.245919
2023-01-18 -2.009780 -1.274692
2023-01-19  1.180384  0.422269
2023-01-20 -0.109700  0.970320


### Resampling with DatetimeIndex

In [19]:
# Create a DataFrame with DatetimeIndex
dates = pd.date_range('2023-01-01', periods=100, freq='D')
df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=dates)
print("Original DataFrame (daily data):")
print(df.head())

# Resample to monthly frequency
monthly = df.resample('M').mean()
print("\nResampled to monthly frequency:")
print(monthly)

# Resample to weekly frequency
weekly = df.resample('W').mean()
print("\nResampled to weekly frequency:")
print(weekly.head())

Original DataFrame (daily data):
                   A         B
2023-01-01  0.503237 -0.698871
2023-01-02 -0.794171  0.380415
2023-01-03  0.121533  1.342268
2023-01-04 -0.726076 -1.603510
2023-01-05 -0.002185  0.481766

Resampled to monthly frequency:
                   A         B
2023-01-31 -0.029624  0.142338
2023-02-28 -0.178096  0.105550
2023-03-31 -0.202149  0.850325
2023-04-30  0.216158  0.164368

Resampled to weekly frequency:
                   A         B
2023-01-01  0.503237 -0.698871
2023-01-08 -0.363987  0.241916
2023-01-15  0.069081 -0.014906
2023-01-22  0.108073 -0.304444
2023-01-29 -0.246854  0.740585


  monthly = df.resample('M').mean()
