# Pandas Part 78: DatetimeIndex

This notebook explores the DatetimeIndex class, which is an immutable ndarray of datetime64 data that can be used for indexing in pandas.

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, time

## 1. Creating DatetimeIndex

There are several ways to create a DatetimeIndex:

In [None]:
# Create DatetimeIndex from a list of datetime-like strings
dates = ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04']
dt_idx = pd.DatetimeIndex(dates)
print(f"DatetimeIndex from strings: {dt_idx}")

# Create DatetimeIndex from datetime objects
dates = [datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 3)]
dt_idx = pd.DatetimeIndex(dates)
print(f"\nDatetimeIndex from datetime objects: {dt_idx}")

# Create DatetimeIndex using date_range
dt_idx = pd.date_range(start='2023-01-01', periods=5)
print(f"\nDatetimeIndex from date_range (daily): {dt_idx}")

# Create DatetimeIndex with a specific frequency
dt_idx = pd.date_range(start='2023-01-01', periods=5, freq='M')  # Monthly frequency
print(f"\nDatetimeIndex with monthly frequency: {dt_idx}")

### Creating DatetimeIndex with Time Zone

In [None]:
# Create DatetimeIndex with timezone
dt_idx = pd.DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03'], tz='UTC')
print(f"DatetimeIndex with UTC timezone: {dt_idx}")

# Create DatetimeIndex with a different timezone
dt_idx = pd.date_range(start='2023-01-01', periods=3, tz='US/Eastern')
print(f"\nDatetimeIndex with US/Eastern timezone: {dt_idx}")

# Convert timezone
dt_idx_pst = dt_idx.tz_convert('US/Pacific')
print(f"\nConverted to US/Pacific timezone: {dt_idx_pst}")

### Handling Ambiguous Times

In [None]:
# Create a DatetimeIndex that includes a DST transition
# In most of Europe, clocks go back 1 hour on the last Sunday of October
try:
    # This might raise an AmbiguousTimeError
    dt_idx = pd.DatetimeIndex(['2023-10-29 01:30:00', '2023-10-29 02:30:00'], tz='Europe/Berlin')
    print(f"DatetimeIndex with DST transition: {dt_idx}")
except pd.errors.OutOfBoundsDatetime:
    print("Error: Ambiguous times detected during DST transition")
    
# Handle ambiguous times with 'NaT'
dt_idx = pd.DatetimeIndex(['2023-10-29 01:30:00', '2023-10-29 02:30:00'], 
                          tz='Europe/Berlin', 
                          ambiguous='NaT')
print(f"\nHandling ambiguous times with 'NaT': {dt_idx}")

# Handle ambiguous times with boolean array
dt_idx = pd.DatetimeIndex(['2023-10-29 01:30:00', '2023-10-29 02:30:00'], 
                          tz='Europe/Berlin', 
                          ambiguous=[True, False])  # True for DST, False for non-DST
print(f"\nHandling ambiguous times with boolean array: {dt_idx}")

## 2. DatetimeIndex Attributes

DatetimeIndex provides various attributes to access datetime components:

In [None]:
# Create a DatetimeIndex
dt_idx = pd.date_range(start='2023-01-01 12:30:45.123456', periods=3, freq='D')
print(f"DatetimeIndex: {dt_idx}")

# Access year component
print(f"\nYear: {dt_idx.year}")

# Access month component
print(f"Month: {dt_idx.month}")

# Access day component
print(f"Day: {dt_idx.day}")

# Access hour component
print(f"Hour: {dt_idx.hour}")

# Access minute component
print(f"Minute: {dt_idx.minute}")

# Access second component
print(f"Second: {dt_idx.second}")

# Access microsecond component
print(f"Microsecond: {dt_idx.microsecond}")

# Access nanosecond component
print(f"Nanosecond: {dt_idx.nanosecond}")

### Date and Time Components

In [None]:
# Create a DatetimeIndex
dt_idx = pd.date_range(start='2023-01-01 12:30:45', periods=3, freq='D')
print(f"DatetimeIndex: {dt_idx}")

# Get date component (returns numpy array of datetime.date objects)
dates = dt_idx.date
print(f"\nDate component: {dates}")
print(f"Type of first element: {type(dates[0])}")

# Get time component (returns numpy array of datetime.time objects)
times = dt_idx.time
print(f"\nTime component: {times}")
print(f"Type of first element: {type(times[0])}")

In [None]:
# Create a DatetimeIndex with timezone
dt_idx = pd.date_range(start='2023-01-01 12:30:45', periods=3, freq='D', tz='UTC')
print(f"DatetimeIndex with timezone: {dt_idx}")

# Get time with timezone component
timetz = dt_idx.timetz
print(f"\nTime with timezone component: {timetz}")
print(f"Type of first element: {type(timetz[0])}")
print(f"Timezone of first element: {timetz[0].tzname()}")

### Calendar-Related Attributes

In [None]:
# Create a DatetimeIndex spanning multiple months
dt_idx = pd.date_range(start='2023-01-01', end='2023-03-31', freq='D')
print(f"DatetimeIndex: {dt_idx}")

# Get day of year
print(f"\nDay of year for first 5 dates: {dt_idx.dayofyear[:5]}")

# Get week of year
print(f"Week of year for first 5 dates: {dt_idx.weekofyear[:5]}")

# Get day of week (0=Monday, 6=Sunday)
print(f"Day of week for first 5 dates: {dt_idx.dayofweek[:5]}")

# Get quarter
print(f"Quarter for first 5 dates: {dt_idx.quarter[:5]}")

## 3. DatetimeIndex Methods

DatetimeIndex provides various methods for working with datetime data:

### month_name and day_name Methods

In [None]:
# Create a DatetimeIndex for monthly data
monthly_idx = pd.date_range(start='2023-01-31', freq='M', periods=3)
print(f"Monthly DatetimeIndex: {monthly_idx}")

# Get month names
month_names = monthly_idx.month_name()
print(f"\nMonth names: {month_names}")

# Get month names in a different locale
try:
    month_names_fr = monthly_idx.month_name(locale='fr_FR')
    print(f"Month names (French): {month_names_fr}")
except ImportError:
    print("French locale not available. Make sure you have the 'babel' package installed.")

In [None]:
# Create a DatetimeIndex for daily data
daily_idx = pd.date_range(start='2023-01-01', freq='D', periods=3)
print(f"Daily DatetimeIndex: {daily_idx}")

# Get day names
day_names = daily_idx.day_name()
print(f"\nDay names: {day_names}")

# Get day names in a different locale
try:
    day_names_es = daily_idx.day_name(locale='es_ES')
    print(f"Day names (Spanish): {day_names_es}")
except ImportError:
    print("Spanish locale not available. Make sure you have the 'babel' package installed.")

### mean Method

In [None]:
# Create a DatetimeIndex
dt_idx = pd.DatetimeIndex(['2023-01-01', '2023-01-03', '2023-01-05'])
print(f"DatetimeIndex: {dt_idx}")

# Calculate the mean
mean_date = dt_idx.mean()
print(f"\nMean date: {mean_date}")
print(f"Type: {type(mean_date)}")

In [None]:
# Create a DatetimeIndex with NaT (Not a Time)
dt_idx = pd.DatetimeIndex(['2023-01-01', '2023-01-03', 'NaT', '2023-01-05'])
print(f"DatetimeIndex with NaT: {dt_idx}")

# Calculate the mean (skipna=True by default)
mean_date = dt_idx.mean()
print(f"\nMean date (skipna=True): {mean_date}")

# Calculate the mean with skipna=False
try:
    mean_date = dt_idx.mean(skipna=False)
    print(f"Mean date (skipna=False): {mean_date}")
except ValueError as e:
    print(f"Error with skipna=False: {e}")

## 4. Using DatetimeIndex with Series and DataFrame

In [None]:
# Create a Series with DatetimeIndex
dates = pd.date_range('2023-01-01', periods=5, freq='D')
s = pd.Series(range(5), index=dates)
print("Series with DatetimeIndex:")
print(s)

# Create a DataFrame with DatetimeIndex
df = pd.DataFrame({'A': range(5), 'B': range(5, 10)}, index=dates)
print("\nDataFrame with DatetimeIndex:")
print(df)

### Selecting Data with DatetimeIndex

In [None]:
# Create a DataFrame with DatetimeIndex spanning multiple months
dates = pd.date_range('2023-01-01', periods=100, freq='D')
df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=dates)
print("DataFrame with DatetimeIndex:")
print(df.head())

# Select data for a specific date
print("\nData for 2023-01-05:")
print(df.loc['2023-01-05'])

# Select data for a specific month
print("\nData for January 2023:")
print(df.loc['2023-01'].head())

# Select data for a date range
print("\nData from 2023-01-15 to 2023-01-20:")
print(df.loc['2023-01-15':'2023-01-20'])

### Resampling with DatetimeIndex

In [None]:
# Create a DataFrame with DatetimeIndex
dates = pd.date_range('2023-01-01', periods=100, freq='D')
df = pd.DataFrame({'A': np.random.randn(100), 'B': np.random.randn(100)}, index=dates)
print("Original DataFrame (daily data):")
print(df.head())

# Resample to monthly frequency
monthly = df.resample('M').mean()
print("\nResampled to monthly frequency:")
print(monthly)

# Resample to weekly frequency
weekly = df.resample('W').mean()
print("\nResampled to weekly frequency:")
print(weekly.head())