## Simulating Time Series with Numpy and Data

In [1]:
import pandas as pd

In [2]:
# Adapted from: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html

# Creates a list 72 hours starting from midnight Jan 1st, 2011.
# Use this as an idex for a new df we are going to create.
rng = pd.date_range('1/1/2011', periods=72, freq='H')

In [3]:
# Range 
rng

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00', '2011-01-01 05:00:00',
               '2011-01-01 06:00:00', '2011-01-01 07:00:00',
               '2011-01-01 08:00:00', '2011-01-01 09:00:00',
               '2011-01-01 10:00:00', '2011-01-01 11:00:00',
               '2011-01-01 12:00:00', '2011-01-01 13:00:00',
               '2011-01-01 14:00:00', '2011-01-01 15:00:00',
               '2011-01-01 16:00:00', '2011-01-01 17:00:00',
               '2011-01-01 18:00:00', '2011-01-01 19:00:00',
               '2011-01-01 20:00:00', '2011-01-01 21:00:00',
               '2011-01-01 22:00:00', '2011-01-01 23:00:00',
               '2011-01-02 00:00:00', '2011-01-02 01:00:00',
               '2011-01-02 02:00:00', '2011-01-02 03:00:00',
               '2011-01-02 04:00:00', '2011-01-02 05:00:00',
               '2011-01-02 06:00:00', '2011-01-02 07:00:00',
               '2011-01-

In [4]:
import numpy as np

In [5]:
# Creates a new df for us.
# This index is indexed by the date. 
# np.random.randn(len(rng)) generates random numbers from numpy
ts = pd.DataFrame(np.random.randn(len(rng)), index=rng)

In [6]:
ts

Unnamed: 0,0
2011-01-01 00:00:00,-0.664865
2011-01-01 01:00:00,-1.744572
2011-01-01 02:00:00,-0.174231
2011-01-01 03:00:00,0.076558
2011-01-01 04:00:00,0.420847
2011-01-01 05:00:00,2.433418
2011-01-01 06:00:00,-0.821228
2011-01-01 07:00:00,-0.530764
2011-01-01 08:00:00,-0.619759
2011-01-01 09:00:00,0.654405


In [7]:
# To give your datframe columns names: Add in - columns=['Admissions'])
ts = pd.DataFrame(np.random.randn(len(rng)), index=rng, columns=['Admissions'])

In [8]:
ts

Unnamed: 0,Admissions
2011-01-01 00:00:00,-1.802179
2011-01-01 01:00:00,1.981414
2011-01-01 02:00:00,0.266398
2011-01-01 03:00:00,0.616773
2011-01-01 04:00:00,-1.775502
2011-01-01 05:00:00,-0.365471
2011-01-01 06:00:00,-0.442934
2011-01-01 07:00:00,0.423244
2011-01-01 08:00:00,-1.633655
2011-01-01 09:00:00,-0.046631


### Generate some false data for hospital admissions.
- Try and round off the numbers above where the number is,
- an integer greater than or = to zero, either one or zero people came in at this period.
- Create a function that round off numbers for us.
- Go to Numpy doc, Numpy Random Poisson.

In [9]:
# Generates random numbers, with 5 being the average number of admissions.
# sometimes it can be others like below.

np.random.poisson(5)

6

In [10]:
ts = pd.DataFrame(np.random.poisson(10, len(rng)), index=rng, columns=['Admissions'])

### Creates a time series and round off the above previous numbers for patients, being admittted every hour over a certain amount of days
- # Below is a time stamp, beginning at a certain date and ending at a certain date.
- Two admissions starting at 00:00:00 up until 1am the same night.
- Be accurate with what the dates and time formats mean when analzying these time series.
- Can be tricky, ranges, daylight savings, specific points in time etc.
- Why are some hours greater than other, can create extra columns with what you might think it means, alcohol etc

In [11]:
ts

Unnamed: 0,Admissions
2011-01-01 00:00:00,11
2011-01-01 01:00:00,12
2011-01-01 02:00:00,9
2011-01-01 03:00:00,15
2011-01-01 04:00:00,6
2011-01-01 05:00:00,7
2011-01-01 06:00:00,10
2011-01-01 07:00:00,9
2011-01-01 08:00:00,9
2011-01-01 09:00:00,12


## Slicing and Aggregating time series in Pandas.
#### Acessing and Grouping.

In [12]:
# Average admissions every hour.
ts.mean()

Admissions    10.416667
dtype: float64

In [16]:
# Sub-selection of the data set using datetime index.
ts.loc['2011-01-01 03:00:00']

Admissions    15
Name: 2011-01-01 03:00:00, dtype: int32

In [19]:
# range of values
# can ask for mean at end of function
ts.loc['2011-01-02 03:00:00': '2011-01-02 23:00:00']

Unnamed: 0,Admissions
2011-01-02 03:00:00,8
2011-01-02 04:00:00,18
2011-01-02 05:00:00,10
2011-01-02 06:00:00,11
2011-01-02 07:00:00,9
2011-01-02 08:00:00,17
2011-01-02 09:00:00,12
2011-01-02 10:00:00,13
2011-01-02 11:00:00,10
2011-01-02 12:00:00,13


In [20]:
# can ask for mean at end of function
ts.loc['2011-01-02 03:00:00': '2011-01-02 23:00:00'].mean()

Admissions    10.190476
dtype: float64

## Aggregation

In [22]:
# Average of the days from above
# Find the functions on the Pandas time series'date functionality page,
# and try some out here experiment more with time series.
ts.resample('D').mean()

Unnamed: 0,Admissions
2011-01-01,10.0
2011-01-02,10.291667
2011-01-03,10.958333
