## Preprocess data into 15 intervals

## Imports

In [4]:
from __future__ import absolute_import, division, print_function, unicode_literals
import functools
from IPython.display import Image
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import glob

from os.path import join

from processing_helper import extract_data


%matplotlib inline
pd.set_option("display.precision", 2)

In [5]:
PATH = join("data", "historia_przejazdow_2019-03.csv")
PATH

'data/historia_przejazdow_2019-03.csv'

In [25]:
df = pd.read_csv(PATH, index_col=0, parse_dates=True)

df = df[(df["rental_place"] !="Poza stacją") & (df["return_place"] !="Poza stacją")]

df.head()

Unnamed: 0,uid,bike_number,start_time,end_time,rental_place,return_place
886,65533380,57114,2019-03-15 10:21:55,2019-03-15 10:28:18,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)
887,65533368,57092,2019-03-15 10:21:32,2019-03-15 10:28:23,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)
888,65533283,57060,2019-03-15 10:18:19,2019-03-15 10:32:18,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)
889,65533213,57115,2019-03-15 10:15:20,2019-03-15 10:32:21,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)
890,65533616,57114,2019-03-15 10:32:19,2019-03-15 10:34:08,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)


In [27]:
df['start_time']= pd.to_datetime(df['start_time']) 
df['end_time']= pd.to_datetime(df['end_time'])
df.head()

Unnamed: 0,uid,bike_number,start_time,end_time,rental_place,return_place
886,65533380,57114,2019-03-15 10:21:55,2019-03-15 10:28:18,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)
887,65533368,57092,2019-03-15 10:21:32,2019-03-15 10:28:23,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)
888,65533283,57060,2019-03-15 10:18:19,2019-03-15 10:32:18,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)
889,65533213,57115,2019-03-15 10:15:20,2019-03-15 10:32:21,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)
890,65533616,57114,2019-03-15 10:32:19,2019-03-15 10:34:08,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic)


## Add difference in minutes between start and end of a rental

In [52]:
difference_in_minutes = (df['end_time'] - df['start_time']).dt.seconds//60
df["difference_in_minutes"] = difference_in_minutes

df.head()

Unnamed: 0,uid,bike_number,start_time,end_time,rental_place,return_place,difference_in_minutes
886,65533380,57114,2019-03-15 10:21:55,2019-03-15 10:28:18,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),6
887,65533368,57092,2019-03-15 10:21:32,2019-03-15 10:28:23,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),6
888,65533283,57060,2019-03-15 10:18:19,2019-03-15 10:32:18,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),13
889,65533213,57115,2019-03-15 10:15:20,2019-03-15 10:32:21,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),17
890,65533616,57114,2019-03-15 10:32:19,2019-03-15 10:34:08,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),1


## Filter records with a rental shorter then 1 min

In [57]:
df = df[df["difference_in_minutes"] >=1]

df.head()

Unnamed: 0,uid,bike_number,start_time,end_time,rental_place,return_place,difference_in_minutes
886,65533380,57114,2019-03-15 10:21:55,2019-03-15 10:28:18,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),6
887,65533368,57092,2019-03-15 10:21:32,2019-03-15 10:28:23,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),6
888,65533283,57060,2019-03-15 10:18:19,2019-03-15 10:32:18,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),13
889,65533213,57115,2019-03-15 10:15:20,2019-03-15 10:32:21,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),17
890,65533616,57114,2019-03-15 10:32:19,2019-03-15 10:34:08,Świdnicka / Piłsudskiego (Hotel Scandic),Świdnicka / Piłsudskiego (Hotel Scandic),1


## Divide 