# Pandas Tutorial - Part 50

This notebook covers various Series methods including:
- Cross-sectioning with `xs()`
- Working with timezones using `dt.tz_localize()` and `dt.tz_convert()`

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pytz

%matplotlib inline

## Cross-sectioning with `xs()`

The `xs()` method returns a cross-section from a Series or DataFrame with a MultiIndex.

In [None]:
# Create a DataFrame with MultiIndex
d = {'num_legs': [4, 4, 2, 2],
     'num_wings': [0, 0, 2, 2],
     'class': ['mammal', 'mammal', 'mammal', 'bird'],
     'animal': ['cat', 'dog', 'bat', 'penguin'],
     'locomotion': ['walks', 'walks', 'flies', 'walks']}
df = pd.DataFrame(data=d)
df = df.set_index(['class', 'animal', 'locomotion'])
print("DataFrame with MultiIndex:")
print(df)

In [None]:
# Get values at specified index
print("Cross-section for 'mammal':")
print(df.xs('mammal'))

In [None]:
# Get values at several indexes
print("Cross-section for ('mammal', 'dog'):")
print(df.xs(('mammal', 'dog')))

In [None]:
# Get values at specified index and level
print("Cross-section for 'cat' at level 1:")
print(df.xs('cat', level=1))

In [None]:
# Get values at specified index and level with drop_level=False
print("Cross-section for 'cat' at level 1 with drop_level=False:")
print(df.xs('cat', level=1, drop_level=False))

In [None]:
# Get values at specified index and level by position
print("Cross-section for 'walks' at level 2:")
print(df.xs('walks', level=2))

In [None]:
# Create a Series with MultiIndex
s = pd.Series([1, 2, 3, 4], 
              index=pd.MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), 
                                              ('b', 'one'), ('b', 'two')],
                                             names=['letter', 'number']))
print("Series with MultiIndex:")
print(s)

In [None]:
# Get values at specified index
print("Cross-section for 'a':")
print(s.xs('a'))

In [None]:
# Get values at specified index and level
print("Cross-section for 'one' at level 'number':")
print(s.xs('one', level='number'))

## Working with Timezones

Pandas provides methods for working with timezones in datetime Series.

### Localizing Timezones with `dt.tz_localize()`

The `dt.tz_localize()` method localizes tz-naive datetime Series to a given timezone.

In [None]:
# Create a datetime Series
s = pd.Series(pd.date_range('2023-01-01', periods=5))
print("Original datetime Series (tz-naive):")
print(s)

In [None]:
# Localize to UTC
s_utc = s.dt.tz_localize('UTC')
print("Datetime Series localized to UTC:")
print(s_utc)

In [None]:
# Localize to US/Eastern
s_eastern = s.dt.tz_localize('US/Eastern')
print("Datetime Series localized to US/Eastern:")
print(s_eastern)

In [None]:
# Localize to Europe/London
s_london = s.dt.tz_localize('Europe/London')
print("Datetime Series localized to Europe/London:")
print(s_london)

In [None]:
# Create a datetime Series during DST transition
s_dst = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',
                                 '2018-10-28 02:00:00',
                                 '2018-10-28 02:30:00',
                                 '2018-10-28 02:00:00',
                                 '2018-10-28 02:30:00',
                                 '2018-10-28 03:00:00',
                                 '2018-10-28 03:30:00']))
print("Datetime Series during DST transition:")
print(s_dst)

In [None]:
# Localize with ambiguous='infer'
s_dst_cet = s_dst.dt.tz_localize('CET', ambiguous='infer')
print("Datetime Series localized to CET with ambiguous='infer':")
print(s_dst_cet)

In [None]:
# Localize with explicit ambiguous array
s_ambiguous = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',
                                       '2018-10-28 02:36:00',
                                       '2018-10-28 03:46:00']))
s_ambiguous_cet = s_ambiguous.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))
print("Datetime Series localized to CET with explicit ambiguous array:")
print(s_ambiguous_cet)

In [None]:
# Create a datetime Series with nonexistent times (during DST spring forward)
s_nonexistent = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',
                                         '2015-03-29 03:30:00']))
print("Datetime Series with nonexistent times:")
print(s_nonexistent)

In [None]:
# Localize with nonexistent='shift_forward'
s_nonexistent_forward = s_nonexistent.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
print("Datetime Series localized with nonexistent='shift_forward':")
print(s_nonexistent_forward)

In [None]:
# Localize with nonexistent='shift_backward'
s_nonexistent_backward = s_nonexistent.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
print("Datetime Series localized with nonexistent='shift_backward':")
print(s_nonexistent_backward)

In [None]:
# Localize with nonexistent=Timedelta
s_nonexistent_timedelta = s_nonexistent.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
print("Datetime Series localized with nonexistent=Timedelta('1H'):")
print(s_nonexistent_timedelta)

### Converting Timezones with `dt.tz_convert()`

The `dt.tz_convert()` method converts tz-aware datetime Series from one timezone to another.

In [None]:
# Create a tz-aware datetime Series
dti = pd.date_range(start='2014-08-01 09:00', freq='H', periods=3, tz='Europe/Berlin')
s = pd.Series(dti)
print("Original tz-aware datetime Series (Europe/Berlin):")
print(s)

In [None]:
# Convert to US/Eastern
s_eastern = s.dt.tz_convert('US/Eastern')
print("Datetime Series converted to US/Eastern:")
print(s_eastern)

In [None]:
# Convert to Asia/Tokyo
s_tokyo = s.dt.tz_convert('Asia/Tokyo')
print("Datetime Series converted to Asia/Tokyo:")
print(s_tokyo)

In [None]:
# Convert to UTC
s_utc = s.dt.tz_convert('UTC')
print("Datetime Series converted to UTC:")
print(s_utc)

In [None]:
# Remove timezone information
s_naive = s.dt.tz_convert(None)
print("Datetime Series with timezone information removed:")
print(s_naive)

In [None]:
# Try to convert a tz-naive Series
s_naive = pd.Series(pd.date_range('2023-01-01', periods=3))
print("Tz-naive datetime Series:")
print(s_naive)

try:
    s_naive.dt.tz_convert('UTC')
except TypeError as e:
    print(f"\nError: {e}")

## Practical Applications of Timezone Handling

In [None]:
# Create a datetime Series with timestamps from different timezones
timestamps = [
    '2023-01-01 08:00:00',  # New York
    '2023-01-01 14:00:00',  # London
    '2023-01-01 23:00:00',  # Tokyo
]
locations = ['New York', 'London', 'Tokyo']
timezones = ['US/Eastern', 'Europe/London', 'Asia/Tokyo']

# Create a DataFrame
df = pd.DataFrame({
    'timestamp': pd.to_datetime(timestamps),
    'location': locations,
    'timezone': timezones
})
print("Original DataFrame:")
print(df)

In [None]:
# Localize each timestamp to its corresponding timezone
for i, row in df.iterrows():
    df.loc[i, 'localized_timestamp'] = row['timestamp'].tz_localize(row['timezone'])

print("DataFrame with localized timestamps:")
print(df)

In [None]:
# Convert all timestamps to UTC
df['utc_timestamp'] = df['localized_timestamp'].apply(lambda x: x.tz_convert('UTC'))
print("DataFrame with UTC timestamps:")
print(df)

In [None]:
# Check if all timestamps are at the same UTC time
utc_times = df['utc_timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')
print("UTC times:")
print(utc_times)
print(f"\nAll timestamps are at the same UTC time: {utc_times.nunique() == 1}")

## Conclusion

In this notebook, we've explored various Series methods in pandas:

1. Cross-sectioning with `xs()`, which returns a cross-section from a Series or DataFrame with a MultiIndex, allowing for flexible data selection at different levels.
2. Working with timezones using `dt.tz_localize()` and `dt.tz_convert()`, which provide powerful tools for handling datetime data across different timezones.

These methods are essential tools for data manipulation and analysis in pandas, allowing for flexible and powerful operations on your data, especially when working with hierarchical indexes and time series data across different timezones.