# Parsing bikes usage into json

## Imports

In [2]:
import pandas as pd
from os.path import join
from tqdm.notebook import tqdm

## Load csv

In [3]:
df = pd.read_csv(join("bikes_usage", "historia_przejazdow_2019-07.csv"), parse_dates=True)
df

Unnamed: 0,interval_start,interval_end,bikes_in_use,bikes_total,bikes_percentage
0,2019-07-01 00:00:00,2019-07-01 00:15:00,32,1909,0.016763
1,2019-07-01 00:15:00,2019-07-01 00:30:00,71,1909,0.037192
2,2019-07-01 00:30:00,2019-07-01 00:45:00,84,1909,0.044002
3,2019-07-01 00:45:00,2019-07-01 01:00:00,78,1909,0.040859
4,2019-07-01 01:00:00,2019-07-01 01:15:00,65,1909,0.034049
...,...,...,...,...,...
2971,2019-07-31 22:45:00,2019-07-31 23:00:00,152,1909,0.079623
2972,2019-07-31 23:00:00,2019-07-31 23:15:00,123,1909,0.064432
2973,2019-07-31 23:15:00,2019-07-31 23:30:00,100,1909,0.052383
2974,2019-07-31 23:30:00,2019-07-31 23:45:00,99,1909,0.051860


## Remove unnecessary columns

In [4]:
df = df[["interval_start", "bikes_in_use", "bikes_total", "bikes_percentage"]]
# df = df.rename(columns={"interval_start":"s","rental_place":"o","return_place":"d","number_of_trips":"c"})
df['interval_start']= pd.to_datetime(df['interval_start']) 
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,interval_start,bikes_in_use,bikes_total,bikes_percentage
0,2019-07-01 00:00:00,32,1909,0.016763
1,2019-07-01 00:15:00,71,1909,0.037192
2,2019-07-01 00:30:00,84,1909,0.044002
3,2019-07-01 00:45:00,78,1909,0.040859
4,2019-07-01 01:00:00,65,1909,0.034049
...,...,...,...,...
2971,2019-07-31 22:45:00,152,1909,0.079623
2972,2019-07-31 23:00:00,123,1909,0.064432
2973,2019-07-31 23:15:00,100,1909,0.052383
2974,2019-07-31 23:30:00,99,1909,0.051860


## Add day column

In [5]:
df["day"] = df["interval_start"].dt.day
df

Unnamed: 0,interval_start,bikes_in_use,bikes_total,bikes_percentage,day
0,2019-07-01 00:00:00,32,1909,0.016763,1
1,2019-07-01 00:15:00,71,1909,0.037192,1
2,2019-07-01 00:30:00,84,1909,0.044002,1
3,2019-07-01 00:45:00,78,1909,0.040859,1
4,2019-07-01 01:00:00,65,1909,0.034049,1
...,...,...,...,...,...
2971,2019-07-31 22:45:00,152,1909,0.079623,31
2972,2019-07-31 23:00:00,123,1909,0.064432,31
2973,2019-07-31 23:15:00,100,1909,0.052383,31
2974,2019-07-31 23:30:00,99,1909,0.051860,31


## Change hour to a "minute in day" form

In [6]:
df["minute_in_day"] = df["interval_start"].dt.hour*60 + df["interval_start"].dt.minute
df

Unnamed: 0,interval_start,bikes_in_use,bikes_total,bikes_percentage,day,minute_in_day
0,2019-07-01 00:00:00,32,1909,0.016763,1,0
1,2019-07-01 00:15:00,71,1909,0.037192,1,15
2,2019-07-01 00:30:00,84,1909,0.044002,1,30
3,2019-07-01 00:45:00,78,1909,0.040859,1,45
4,2019-07-01 01:00:00,65,1909,0.034049,1,60
...,...,...,...,...,...,...
2971,2019-07-31 22:45:00,152,1909,0.079623,31,1365
2972,2019-07-31 23:00:00,123,1909,0.064432,31,1380
2973,2019-07-31 23:15:00,100,1909,0.052383,31,1395
2974,2019-07-31 23:30:00,99,1909,0.051860,31,1410


## Count the number of days in this month

In [7]:
days_in_month = df["interval_start"].dt.daysinmonth.max()
days_in_month

31

## Remove unnecessary columns and rename rest  with {"bikes_in_use":"bu", "bikes_total":"bt", "bikes_percentage":"bp"}

In [8]:
df = df[["day", "minute_in_day", "bikes_in_use", "bikes_total", "bikes_percentage"]]

df = df.rename(columns={"bikes_in_use":"bu", "bikes_total":"bt", "bikes_percentage":"bp"})
df

Unnamed: 0,day,minute_in_day,bu,bt,bp
0,1,0,32,1909,0.016763
1,1,15,71,1909,0.037192
2,1,30,84,1909,0.044002
3,1,45,78,1909,0.040859
4,1,60,65,1909,0.034049
...,...,...,...,...,...
2971,31,1365,152,1909,0.079623
2972,31,1380,123,1909,0.064432
2973,31,1395,100,1909,0.052383
2974,31,1410,99,1909,0.051860


## Example for a single day

In [10]:
(df[df.day==31])[["minute_in_day", "bu", "bt", "bp"]].groupby('minute_in_day').apply(lambda g: g[["bu", "bt", "bp"]].to_dict(orient='records')).to_dict()

{0: [{'bu': 74, 'bt': 1909, 'bp': 0.03876375065479309}],
 15: [{'bu': 82, 'bt': 1909, 'bp': 0.042954426401257205}],
 30: [{'bu': 57, 'bt': 1909, 'bp': 0.029858564693556838}],
 45: [{'bu': 44, 'bt': 1909, 'bp': 0.023048716605552647}],
 60: [{'bu': 37, 'bt': 1909, 'bp': 0.019381875327396544}],
 75: [{'bu': 38, 'bt': 1909, 'bp': 0.01990570979570456}],
 90: [{'bu': 36, 'bt': 1909, 'bp': 0.018858040859088532}],
 105: [{'bu': 43, 'bt': 1909, 'bp': 0.022524882137244632}],
 120: [{'bu': 41, 'bt': 1909, 'bp': 0.0214772132006286}],
 135: [{'bu': 38, 'bt': 1909, 'bp': 0.01990570979570456}],
 150: [{'bu': 29, 'bt': 1909, 'bp': 0.015191199580932424}],
 165: [{'bu': 23, 'bt': 1909, 'bp': 0.012048192771084338}],
 180: [{'bu': 23, 'bt': 1909, 'bp': 0.012048192771084338}],
 195: [{'bu': 26, 'bt': 1909, 'bp': 0.013619696176008382}],
 210: [{'bu': 26, 'bt': 1909, 'bp': 0.013619696176008382}],
 225: [{'bu': 25, 'bt': 1909, 'bp': 0.013095861707700369}],
 240: [{'bu': 22, 'bt': 1909, 'bp': 0.011524358302776

## Loop for each day

In [12]:
month_dict = {}

for day in range(1, days_in_month+1):
    dict_for_current_day = (df[df.day==day])[["minute_in_day", "bu", "bt", "bp"]].groupby('minute_in_day').apply(lambda g: g[["bu", "bt", "bp"]].to_dict(orient='records')).to_dict()
    
    month_dict[day] = dict_for_current_day

## Save as json

In [13]:
import json

with open('07bikes.json', 'w') as fp:
    json.dump(month_dict, fp)