## UTC offsets

### Creating timezone aware datetimes

In [2]:
# Import datetime, timezone
from datetime import datetime, timezone, timedelta

# October 1, 2017 at 15:26:26, UTC
dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=timezone.utc)

# Print results
print(dt.isoformat())

2017-10-01T15:26:26+00:00


In [3]:
# Create a timezone for Pacific Standard Time, or UTC-8
pst = timezone(timedelta(hours=-8))

# October 1, 2017 at 15:26:26, UTC-8
dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=pst)

# Print results
print(dt.isoformat())

2017-10-01T15:26:26-08:00


In [5]:
# Create a timezone for Australian Eastern Daylight Time, or UTC+11
aedt = timezone(timedelta(hours=11))

# October 1, 2017 at 15:26:26, UTC+11
dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=aedt)

# Print results
print(dt.isoformat())

2017-10-01T15:26:26+11:00


### Setting timezones


#### Preprocess

In [7]:
import pandas as pd

In [8]:
df = pd.read_csv('./dataset/capital-onebike.csv', header=0)

onebike_datetimes_strings = df[['Start date', 'End date']]

# Write down the format string
fmt = '%Y-%m-%d %H:%M:%S'

# Initialize
onebike_datetimes = []
for i, (start, end) in onebike_datetimes_strings.iterrows():
    trip = {'start': datetime.strptime(start, fmt),
            'end': datetime.strptime(end, fmt)}
    
    onebike_datetimes.append(trip)

In [10]:
# Create a timezone object corresponding to UTC-4
edt = timezone(timedelta(hours=-4))

# Loop over trips, updating the start and end datetimes to be in UTC-4
for trip in onebike_datetimes[:10]:
    # Update trip['start'] and trip['end']
    trip['start'] = trip['start'].replace(tzinfo=edt)
    trip['end'] = trip['end'].replace(tzinfo=edt)

### What time did the bike leave in UTC?

In [12]:
# Loop over the trips
for trip in onebike_datetimes[:10]:
    # Pull out the start and set it to UTC
    dt = trip['start'].astimezone(timezone.utc)
    
    # Print the start time in UTC
    print('Original:', trip['start'], '| UTC:', dt.isoformat())

Original: 2017-10-01 02:23:25-04:00 | UTC: 2017-10-01T06:23:25+00:00
Original: 2017-10-01 02:42:57-04:00 | UTC: 2017-10-01T06:42:57+00:00
Original: 2017-10-01 17:37:10-04:00 | UTC: 2017-10-01T21:37:10+00:00
Original: 2017-10-01 19:56:45-04:00 | UTC: 2017-10-01T23:56:45+00:00
Original: 2017-10-02 05:23:48-04:00 | UTC: 2017-10-02T09:23:48+00:00
Original: 2017-10-02 05:48:08-04:00 | UTC: 2017-10-02T09:48:08+00:00
Original: 2017-10-02 06:18:10-04:00 | UTC: 2017-10-02T10:18:10+00:00
Original: 2017-10-02 06:37:32-04:00 | UTC: 2017-10-02T10:37:32+00:00
Original: 2017-10-02 19:24:16-04:00 | UTC: 2017-10-02T23:24:16+00:00
Original: 2017-10-03 05:17:07-04:00 | UTC: 2017-10-03T09:17:07+00:00


## Time zone database


### Putting the bike trips into the right time zone

In [13]:
# Import tz
from dateutil import tz

# Create a timezone object for Eastern Time
et = tz.gettz('America/New_York')

# Loop over trips
for trip in onebike_datetimes[:10]:
    # Update trip['start'] and trip['end']
    trip['start'] = trip['start'].replace(tzinfo=et)
    trip['end'] = trip['end'].replace(tzinfo=et)

### What time did the bike leave? (Global edition)

In [15]:
# Create the timezone object
uk = tz.gettz('Europe/London')

# Pull out the start of the first trip
local = onebike_datetimes[0]['start']

# What time was it in the UK
notlocal = local.astimezone(uk)

# Print them out and see the difference
print(local.isoformat())
print(notlocal.isoformat())

2017-10-01T02:23:25-04:00
2017-10-01T07:23:25+01:00


In [16]:
# Create the timezone object
ist = tz.gettz('Asia/Kolkata')

# Pull out the start of the first trip
local = onebike_datetimes[0]['start']

# What time was it in India
notlocal = local.astimezone(ist)

# Print them out and see the difference
print(local.isoformat())
print(notlocal.isoformat())

2017-10-01T02:23:25-04:00
2017-10-01T11:53:25+05:30


In [18]:
# Create the timezone object
sm = tz.gettz('Pacific/Apia')

# Pull out the start of the first trip
local = onebike_datetimes[0]['start']

# What time was it in India
notlocal = local.astimezone(sm)

# Print them out and see the difference
print(local.isoformat())
print(notlocal.isoformat())

2017-10-01T02:23:25-04:00
2017-10-01T20:23:25+14:00


## Starting daylight saving time

### How many hours elapsed around daylight saving?

In [20]:
# Start on March 12, 2017, midnight, then add 6 hours
start = datetime(2017, 3, 12, tzinfo=tz.gettz('America/New_York'))
end = start + timedelta(hours=6)
print(start.isoformat() + " to " + end.isoformat())

2017-03-12T00:00:00-05:00 to 2017-03-12T06:00:00-04:00


In [21]:
# How many hours have elapsed
print((end - start).total_seconds() / (60 * 60))

6.0


In [22]:
# What if we move to UTC
print((end.astimezone(timezone.utc) - start.astimezone(timezone.utc)).total_seconds() / (60 * 60))

5.0


### March 29, throughtout a decade

In [23]:
# Create starting date
dt = datetime(2000, 3, 29, tzinfo=tz.gettz('Europe/London'))

# Loop overt the dates, replacing the year, and print the ISO timestamp
for y in range(2000, 2011):
    print(dt.replace(year=y).isoformat())

2000-03-29T00:00:00+01:00
2001-03-29T00:00:00+01:00
2002-03-29T00:00:00+00:00
2003-03-29T00:00:00+00:00
2004-03-29T00:00:00+01:00
2005-03-29T00:00:00+01:00
2006-03-29T00:00:00+01:00
2007-03-29T00:00:00+01:00
2008-03-29T00:00:00+00:00
2009-03-29T00:00:00+00:00
2010-03-29T00:00:00+01:00


## Ending daylight saving time

### Finding ambiguous datetimes


In [27]:
# Loop over trips
for trip in onebike_datetimes:
    trip['start'] = trip['start'].replace(tzinfo=tz.gettz('America/New_York'))
    trip['end'] = trip['end'].replace(tzinfo=tz.gettz('America/New_York'))
    # Rides with ambiguous start
    if tz.datetime_ambiguous(trip['start']):
        print('Ambiguous start at ' + str(trip['start']))
    if tz.datetime_ambiguous(trip['end']):
        print('Ambiguous end at ' + str(trip['end']))

Ambiguous start at 2017-11-05 01:56:50-04:00
Ambiguous end at 2017-11-05 01:01:04-04:00


### Cleaning daylight saving data with fold


In [30]:
trip_durations = []
for trip in onebike_datetimes:
    # When the start is later than the end, set the fold to be 1
    if trip['start'] > trip['end']:
        trip['end'] = tz.enfold(trip['end'])
    # Convert to UTC
    start = trip['start'].astimezone(timezone.utc)
    end = trip['end'].astimezone(timezone.utc)
    
    # Subtract the difference
    trip_length_seconds = (end - start).total_seconds()
    trip_durations.append(trip_length_seconds)
    
# Take the shortest trip duration
print("Shortest trip: " + str(min(trip_durations)))

Shortest trip: 116.0
