<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Objectifs" data-toc-modified-id="Objectifs-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Objectifs</a></span></li><li><span><a href="#Dev" data-toc-modified-id="Dev-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Dev</a></span></li><li><span><a href="#Industrialisation" data-toc-modified-id="Industrialisation-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Industrialisation</a></span><ul class="toc-item"><li><ul class="toc-item"><li><span><a href="#Script" data-toc-modified-id="Script-3.0.1"><span class="toc-item-num">3.0.1&nbsp;&nbsp;</span>Script</a></span></li></ul></li></ul></li><li><span><a href="#Analyse-de-la-feature" data-toc-modified-id="Analyse-de-la-feature-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Analyse de la feature</a></span><ul class="toc-item"><li><span><a href="#Stations-avec-le-plus-grand-nombre-de-non-prise-de-vélo-consécutif" data-toc-modified-id="Stations-avec-le-plus-grand-nombre-de-non-prise-de-vélo-consécutif-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Stations avec le plus grand nombre de non prise de vélo consécutif</a></span></li></ul></li></ul></div>

In [1]:
import pandas as pd

from vcub_keeper.reader.reader import *
from vcub_keeper.visualisation import plot_station_activity
from vcub_keeper.transform.features_factory import *

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

%load_ext autoreload
%autoreload 2

pd.options.display.max_rows = 500

## Objectifs

- Permettre de savoir depuis combien de temps un vélo n'a pas été pris par station. Plus ce nombre est grand plus il y a de chance que la station soit HS. Cf https://github.com/armgilles/vcub_keeper/issues/10
- Industrialisation de la function.
- Analyse de la feature

## Dev

In [6]:
activite = read_activity_vcub()

activite = get_transactions_out(activite)

In [7]:
activite['compteur'] = 1
activite['consecutive_no_transactions_out'] = \
    activite.groupby([(activite['transactions_out'] > 0).cumsum(),
                      'station_id'])['compteur'].cumsum() # 

activite['consecutive_no_transactions_out'] = \
    activite['consecutive_no_transactions_out'].fillna(0)

activite = activite.drop('compteur', axis=1)

In [9]:
activite[(activite['station_id'] == 1) & (activite.date >= "2017-07-10 09:04:04")].head(15)

Unnamed: 0,gid,station_id,type,name,state,available_stand,available_bike,date,transactions_out,consecutive_no_transactions_out
395,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:04:04,0.0,3
396,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:09:03,0.0,4
397,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:14:06,0.0,5
398,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:19:04,0.0,6
399,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:24:04,0.0,7
400,83,1,VLS,Meriadeck,1,1,19,2017-07-10 09:29:07,1.0,1
401,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:34:05,0.0,2
402,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:39:05,0.0,3
403,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:44:04,0.0,4
404,83,1,VLS,Meriadeck,1,0,20,2017-07-10 09:49:04,0.0,5


In [10]:
# Entre 2 stations différentes
activite[21610:21625]

Unnamed: 0,gid,station_id,type,name,state,available_stand,available_bike,date,transactions_out,consecutive_no_transactions_out
21610,83,1,VLS,Meriadeck,1,16,3,2017-09-26 14:14:03,0.0,5
21611,83,1,VLS,Meriadeck,1,17,2,2017-09-26 14:19:03,1.0,1
21612,83,1,VLS,Meriadeck,1,16,3,2017-09-26 14:24:04,0.0,2
21613,83,1,VLS,Meriadeck,1,15,4,2017-09-26 14:29:05,0.0,3
21614,83,1,VLS,Meriadeck,1,14,5,2017-09-26 14:34:03,0.0,4
21615,83,1,VLS,Meriadeck,1,14,5,2017-09-26 14:39:02,0.0,5
21616,83,1,VLS,Meriadeck,1,13,6,2017-09-26 14:44:05,0.0,6
21617,83,1,VLS,Meriadeck,1,14,5,2017-09-26 14:49:05,1.0,1
21618,83,1,VLS,Meriadeck,1,13,6,2017-09-26 14:54:04,0.0,2
21619,83,1,VLS,Meriadeck,1,13,6,2017-09-26 14:59:04,0.0,3


## Industrialisation

In [12]:
def get_consecutive_no_transactions_out(data):
    """
    Calcul depuis combien de temps la station n'a pas eu de prise de vélo. Plus le chiffre est haut, 
    plus ça fait longtemps que la station est inactive sur la prise de vélo.
    
    Parameters
    ----------
    data : DataFrame
        Activité des stations Vcub avec la feature `transactions_out` (get_transactions_out)
    
    Returns
    -------
    data : DataFrame
        Ajout de colonne 'consecutive_no_transactions_out'
        
    Examples
    --------
    
    activite = get_consecutive_no_transactions_out(activite)
    """
    
    data['compteur'] = 1
    data['consecutive_no_transactions_out'] = \
        data.groupby([(data['transactions_out'] > 0).cumsum(),
                      'station_id'])['compteur'].cumsum()

    data['consecutive_no_transactions_out'] = \
        data['consecutive_no_transactions_out'].fillna(0)

    data = data.drop('compteur', axis=1)
    return data

In [13]:
activite = read_activity_vcub()

activite = get_transactions_out(activite)

In [14]:
activite = get_consecutive_no_transactions_out(activite)

#### Script 

In [16]:
from vcub_keeper.reader.reader import *
from vcub_keeper.transform.features_factory import *

In [18]:
activite = read_activity_vcub()

activite = get_transactions_out(activite)
activite = get_consecutive_no_transactions_out(activite)

In [20]:
activite.tail(10)

Unnamed: 0,gid,station_id,type,name,state,available_stand,available_bike,date,transactions_out,consecutive_no_transactions_out
3761870,176,174,VLS,Darwin,1,0,20,2017-09-26 14:14:04,0.0,16
3761871,176,174,VLS,Darwin,1,0,20,2017-09-26 14:19:03,0.0,17
3761872,176,174,VLS,Darwin,1,1,19,2017-09-26 14:24:04,1.0,1
3761873,176,174,VLS,Darwin,1,1,19,2017-09-26 14:29:05,0.0,2
3761874,176,174,VLS,Darwin,1,1,19,2017-09-26 14:34:03,0.0,3
3761875,176,174,VLS,Darwin,1,1,19,2017-09-26 14:39:02,0.0,4
3761876,176,174,VLS,Darwin,1,0,20,2017-09-26 14:44:05,0.0,5
3761877,176,174,VLS,Darwin,1,0,20,2017-09-26 14:49:05,0.0,6
3761878,176,174,VLS,Darwin,1,0,20,2017-09-26 14:54:04,0.0,7
3761879,176,174,VLS,Darwin,1,0,20,2017-09-26 14:59:04,0.0,8


## Analyse de la feature

In [21]:
activite = read_activity_vcub()

activite = get_transactions_out(activite)
activite = get_consecutive_no_transactions_out(activite)

### Stations avec le plus grand nombre de non prise de vélo consécutif

In [34]:
grp_station = activite.groupby('station_id', as_index=False)['consecutive_no_transactions_out'].max()
grp_station = grp_station.sort_values('consecutive_no_transactions_out', ascending=0)

In [36]:
grp_station.head(10)

Unnamed: 0,station_id,consecutive_no_transactions_out
136,137,21620
34,35,21620
92,93,21620
75,76,17164
103,104,8074
46,47,7775
24,25,7575
159,160,3953
118,119,3490
154,155,1939


**Certaines stations sont très inactives (travaux ?)**

In [50]:
station_id = 160
start_date = "2017-08-05"
end_date = "2017-08-07"
tt = plot_station_activity(activite, station_id=station_id, 
                      features_to_plot=['available_bike', 'available_stand',
                                        'consecutive_no_transactions_out'],
                      #start_date=start_date,
                      #end_date=end_date,
                      return_data=False
                     )

In [53]:
station_id = 155
start_date = "2017-07-25"
end_date = "2017-08-09"
tt = plot_station_activity(activite, station_id=station_id, 
                      features_to_plot=['available_bike', 'available_stand',
                                        'consecutive_no_transactions_out'],
                      start_date=start_date,
                      end_date=end_date,
                      return_data=True
                     )

In [54]:
tt[100:150]

Unnamed: 0,gid,station_id,type,name,state,available_stand,available_bike,date,transactions_out,consecutive_no_transactions_out
3333892,156,155,VLS+,Village 6 IUT,1,17,7,2017-07-25 09:30:04,0.0,5
3333893,156,155,VLS+,Village 6 IUT,1,16,8,2017-07-25 09:40:03,0.0,6
3333894,156,155,VLS+,Village 6 IUT,1,16,8,2017-07-25 09:45:04,0.0,7
3333895,156,155,VLS+,Village 6 IUT,1,15,9,2017-07-25 09:55:04,0.0,8
3333896,156,155,VLS+,Village 6 IUT,1,15,9,2017-07-25 10:00:05,0.0,9
3333897,156,155,VLS+,Village 6 IUT,1,15,9,2017-07-25 10:05:04,0.0,10
3333898,156,155,VLS+,Village 6 IUT,1,14,10,2017-07-25 10:10:04,0.0,11
3333899,156,155,VLS+,Village 6 IUT,1,14,10,2017-07-25 10:20:04,0.0,12
3333900,156,155,VLS+,Village 6 IUT,1,14,10,2017-07-25 10:25:03,0.0,13
3333901,156,155,VLS+,Village 6 IUT,1,14,10,2017-07-25 10:30:04,0.0,14


In [55]:
activite['seconde'] = activite.date.dt.second

In [58]:
activite.tail(15)

Unnamed: 0,gid,station_id,type,name,state,available_stand,available_bike,date,transactions_out,consecutive_no_transactions_out,seconde
3761865,176,174,VLS,Darwin,1,2,18,2017-09-26 13:49:04,0.0,11,4
3761866,176,174,VLS,Darwin,1,2,18,2017-09-26 13:54:04,0.0,12,4
3761867,176,174,VLS,Darwin,1,2,18,2017-09-26 13:59:03,0.0,13,3
3761868,176,174,VLS,Darwin,1,1,19,2017-09-26 14:04:05,0.0,14,5
3761869,176,174,VLS,Darwin,1,1,19,2017-09-26 14:09:04,0.0,15,4
3761870,176,174,VLS,Darwin,1,0,20,2017-09-26 14:14:04,0.0,16,4
3761871,176,174,VLS,Darwin,1,0,20,2017-09-26 14:19:03,0.0,17,3
3761872,176,174,VLS,Darwin,1,1,19,2017-09-26 14:24:04,1.0,1,4
3761873,176,174,VLS,Darwin,1,1,19,2017-09-26 14:29:05,0.0,2,5
3761874,176,174,VLS,Darwin,1,1,19,2017-09-26 14:34:03,0.0,3,3
