# Time Series Analysis 1

In [72]:
import os
import time

In [110]:
import numpy as np
import pandas as pd

In [12]:
import gmaps
import gmaps.datasets

### Crimes in Sacramento committed 10 seconds before midnight in 2006

In [27]:
sacramento_crime = pd.read_csv('data/SacramentocrimeJanuary2006.csv', index_col=0)

In [30]:
sacramento_crime.index = pd.to_datetime(sacramento_crime.index)

In [31]:
sacramento_crime.head()

Unnamed: 0_level_0,address,district,beat,grid,crimedescr,ucr_ncic_code,latitude,longitude
cdatetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2006-01-01,3108 OCCIDENTAL DR,3,3C,1115,10851(A)VC TAKE VEH W/O OWNER,2404,38.55042,-121.391416
2006-01-01,2082 EXPEDITION WAY,5,5A,1512,459 PC BURGLARY RESIDENCE,2204,38.473501,-121.490186
2006-01-01,4 PALEN CT,2,2A,212,10851(A)VC TAKE VEH W/O OWNER,2404,38.657846,-121.462101
2006-01-01,22 BECKFORD CT,6,6C,1443,476 PC PASS FICTICIOUS CHECK,2501,38.506774,-121.426951
2006-01-01,3421 AUBURN BLVD,2,2A,508,459 PC BURGLARY-UNSPECIFIED,2299,38.637448,-121.384613


In [32]:
gmaps.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [33]:
locations = sacramento_crime[['latitude', 'longitude']]

In [46]:
late_locations = sacramento_crime.between_time('23:59', '23:59:59')[['latitude', 'longitude']]

In [47]:
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations))
markers = gmaps.marker_layer(late_locations)
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))

## Dates and times

### Timestamps

In [48]:
now = pd.to_datetime('now')

In [49]:
now

Timestamp('2018-11-02 22:51:58.832987')

In [54]:
now.year, now.month, now.week, now.day, now.hour, now.minute, now.second, now.microsecond

(2018, 11, 44, 2, 22, 51, 58, 832987)

In [56]:
now.month_name(), now.day_name()

('November', 'Friday')

### Formatting timestamps

See format [codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior)

In [67]:
now.strftime('%I:%m%p %d-%b-%Y')

'10:11PM 02-Nov-2018'

### Parsing time strings

#### `pandas` can handle standard formats

In [2]:
ts = pd.to_datetime('6-Dec-2018 4:45 PM')

In [68]:
ts

Timestamp('2018-11-02 22:25:41.736841')

#### For unusual formats, use `strptime`

In [70]:
ts = pd.datetime.strptime('10:11PM 02-Nov-2018', '%I:%m%p %d-%b-%Y')

In [71]:
ts

datetime.datetime(2018, 11, 2, 22, 0)

### Intervals

In [73]:
then = pd.to_datetime('now')
time.sleep(5)
now = pd.to_datetime('now')

In [74]:
now - then

Timedelta('0 days 00:00:05.002946')

### Date ranges

A date range is just a collection of time stamps.

In [99]:
dates = pd.date_range(then, now, freq='s')

In [100]:
dates

DatetimeIndex(['2018-11-02 23:04:26.300898', '2018-11-02 23:04:27.300898',
               '2018-11-02 23:04:28.300898', '2018-11-02 23:04:29.300898',
               '2018-11-02 23:04:30.300898', '2018-11-02 23:04:31.300898'],
              dtype='datetime64[ns]', freq='S')

In [101]:
(then - pd.to_timedelta('1.5s')) in dates

False

### Periods

Periods are intervals, not a collection of timestamps.

In [102]:
span = dates.to_period()

In [104]:
span

PeriodIndex(['2018-11-02 23:04:26', '2018-11-02 23:04:27',
             '2018-11-02 23:04:28', '2018-11-02 23:04:29',
             '2018-11-02 23:04:30', '2018-11-02 23:04:31'],
            dtype='period[S]', freq='S')

In [103]:
(then + pd.to_timedelta('1.5s')) in span

True

### Resampling

Sometimes there is a need to generate new time intervals, for example, to regularize irregularly timed observations.

#### Down-sampling

The `resample` method has the same syntax as `groupby`, in that you can apply an aggregate function to the new intervals.

In [154]:
index = pd.date_range(pd.to_datetime('1-1-2018'), periods=365, freq='d')

In [155]:
series = pd.Series(np.arange(len(index)), index=index)

In [156]:
series.head()

2018-01-01    0
2018-01-02    1
2018-01-03    2
2018-01-04    3
2018-01-05    4
Freq: D, dtype: int64

In [157]:
sereis_weekly_average = series.resample('w').mean()
sereis_weekly_average.head()

2018-01-07     3
2018-01-14    10
2018-01-21    17
2018-01-28    24
2018-02-04    31
Freq: W-SUN, dtype: int64

In [158]:
sereis_monthly_sum = series.resample('m').sum()
sereis_monthly_sum.head()

2018-01-31     465
2018-02-28    1246
2018-03-31    2294
2018-04-30    3135
2018-05-31    4185
Freq: M, dtype: int64

In [159]:
sereis_10day_median = series.resample('10d').median()
sereis_10day_median.head()

2018-01-01     4.5
2018-01-11    14.5
2018-01-21    24.5
2018-01-31    34.5
2018-02-10    44.5
dtype: float64

#### Up-sampling

For up-sampling, we need to figure out what we want to do with the missing values. The usual choices are forward fill, backward fill, or interpolation using one of many built-in methods.

In [160]:
upsampled = series.resample('12h')

In [171]:
upsampled.asfreq()[:5]

2018-01-01 00:00:00    0.0
2018-01-01 12:00:00    NaN
2018-01-02 00:00:00    1.0
2018-01-02 12:00:00    NaN
2018-01-03 00:00:00    2.0
Freq: 12H, dtype: float64

In [162]:
upsampled.ffill().head()

2018-01-01 00:00:00    0
2018-01-01 12:00:00    0
2018-01-02 00:00:00    1
2018-01-02 12:00:00    1
2018-01-03 00:00:00    2
Freq: 12H, dtype: int64

In [163]:
upsampled.bfill().head()

2018-01-01 00:00:00    0
2018-01-01 12:00:00    1
2018-01-02 00:00:00    1
2018-01-02 12:00:00    2
2018-01-03 00:00:00    2
Freq: 12H, dtype: int64

In [165]:
upsampled.interpolate('linear').head()

2018-01-01 00:00:00    0.0
2018-01-01 12:00:00    0.5
2018-01-02 00:00:00    1.0
2018-01-02 12:00:00    1.5
2018-01-03 00:00:00    2.0
Freq: 12H, dtype: float64

### Lag, lead and percent change

In [176]:
series.shift?

In [172]:
series.head()

2018-01-01    0
2018-01-02    1
2018-01-03    2
2018-01-04    3
2018-01-05    4
Freq: D, dtype: int64

In [174]:
series.shift(1).head()

2018-01-01    NaN
2018-01-02    0.0
2018-01-03    1.0
2018-01-04    2.0
2018-01-05    3.0
Freq: D, dtype: float64

In [175]:
series.shift(-1).head()

2018-01-01    1.0
2018-01-02    2.0
2018-01-03    3.0
2018-01-04    4.0
2018-01-05    5.0
Freq: D, dtype: float64