### Import des librairies

In [1]:
import gzip
from io import StringIO
import os
import glob
import pandas as pd
import numpy as np
from math import pi
from matplotlib.pyplot import *
import ipywidgets as wg

### Lecture des fichiers

In [2]:
path = 'data/'
files = glob.glob(os.path.join(path + "/*"))
files.sort()
# creation du dictionnaire des stations
stations = np.array([],dtype='str')
dict_stations = {}
for station in files:
    stations = np.append(stations,np.str(station[5:]))
    dict_stations[station[5:]] = []

In [3]:
def open_csv_gz(filename):
    data = gzip.open(filename).read()
    data = str(data,'utf-8')
    data = StringIO(data)
    df = pd.read_csv(data,sep="\t",encoding='utf-8',parse_dates=['Timestamp'])
    return df

In [4]:
# creation de la liste de relevés chronologiques (ou séries temporelles) d'une station
for station in stations:
    files = sorted(glob.glob(os.path.join('data/'+station+"/*")))
    files = [open_csv_gz(item) for item in files]
    dict_stations[station] = files

In [5]:
# Première série temporelle de la station 01. Duc
dict_stations['01. Duc'][0].head()

Unnamed: 0,Timestamp,Station,Bikes,Slots,Total,Status,Humidity,Pressure,Rain,WindDeg,WindSpeed,Snow,TemperatureTemp
0,2014-11-14 09:30:00,01. Duc,4.0,5.0,9.0,clouds,100.0,1013.0,{u'3h': 0},200.504,0.84,{},9.0
1,2014-11-14 09:40:00,01. Duc,4.0,5.0,9.0,mist,100.0,1014.0,{u'3h': 0},200.504,0.84,{},10.0
2,2014-11-14 09:50:00,01. Duc,4.0,5.0,9.0,mist,100.0,1014.0,{u'3h': 0},200.504,0.84,{},10.0
3,2014-11-14 10:00:00,01. Duc,4.0,5.0,9.0,mist,100.0,1014.0,{u'3h': 0},200.504,0.84,{},10.0
4,2014-11-14 10:10:00,01. Duc,4.0,5.0,9.0,clouds,100.0,1013.0,{u'3h': 0},200.504,0.84,{},9.0


### Fonctions permettant la visualisation des données

In [6]:
meteo_min = ['clear','clouds','drizzle','fog','haze','mist','rain','thunderstorm']
meteo_maj = ['Clear','Clouds','Drizzle','Fog','Haze','Mist','Rain','Snow','Thunderstorm']
dict_noms = {meteo_min[i]: meteo_maj[i] for i in range (len(meteo_min))}

def join_time_series(list_dfs):
    df = pd.concat(list_dfs).reset_index(drop=True)
    df = df.replace({'Status':dict_noms})
    df = df[df['TemperatureTemp'] != -273.15] # On supprime la donnée abérrante 
    return df

dict_df_complets = {p: join_time_series(dict_stations[p]) for p in stations}

In [7]:
dict_df_complets['01. Duc'].head()

Unnamed: 0,Timestamp,Station,Bikes,Slots,Total,Status,Humidity,Pressure,Rain,WindDeg,WindSpeed,Snow,TemperatureTemp
0,2014-11-14 09:30:00,01. Duc,4.0,5.0,9.0,Clouds,100.0,1013.0,{u'3h': 0},200.504,0.84,{},9.0
1,2014-11-14 09:40:00,01. Duc,4.0,5.0,9.0,Mist,100.0,1014.0,{u'3h': 0},200.504,0.84,{},10.0
2,2014-11-14 09:50:00,01. Duc,4.0,5.0,9.0,Mist,100.0,1014.0,{u'3h': 0},200.504,0.84,{},10.0
3,2014-11-14 10:00:00,01. Duc,4.0,5.0,9.0,Mist,100.0,1014.0,{u'3h': 0},200.504,0.84,{},10.0
4,2014-11-14 10:10:00,01. Duc,4.0,5.0,9.0,Clouds,100.0,1013.0,{u'3h': 0},200.504,0.84,{},9.0


In [8]:
def show_moustache_status(df,ax):
    status = pd.unique(df['Status'])
    vals = df.groupby('Status')['Bikes'].apply(pd.Series.tolist).tolist()
    bx = ax.boxplot(vals,labels=status,vert=False) 
    xlabel('Nombre de vélos disponibles')
    title('Répartition des vélos disponibles selon la méteo')
    return bx

In [16]:
def spider_plot(df,ax):
    df_tmp = df.copy()
    df_tmp['Ratio Bikes'] = df_tmp['Bikes'].div(df_tmp["Total"], axis=0) * 100
    values = df_tmp.groupby('Status').mean()['Ratio Bikes']
    vals = values.values
    status = values.index
    
    vals = np.append(vals,vals[0])
    angles = [n / float(len(status)) * 2 * pi for n in range(len(status))]
    angles += angles[:1]
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(status, color='grey', size=12)
    ax.set_yticks(np.linspace(0,100,6))
    ax.set_yticklabels(['0%','20%', '40%', '60%', '80%', '100%'],
           color='grey', size=12)
    ax.set_ylim(0, 100)
    #ax.set_rlabel_position(30) 
    pl= ax.plot(angles, vals, linewidth=1, linestyle='solid')
    ax.fill(angles, vals, 'skyblue', alpha=0.4)
    title('Pourcentage moyen de vélos disponibles selon le temps')
    return pl

In [10]:
def mean_std_slots_2h(df):
    serie = df.set_index(['Timestamp'])['Slots']
    serie.index = [serie.index.hour, serie.index]
    means = np.array([])
    stds = np.array([])
    for i in range(12):
        serie_2h = serie[2*i].append(serie[2*i+1])
        mean_2h = serie_2h.mean()
        means = np.append(means,mean_2h)
        std_2h = serie_2h.std()
        stds = np.append(stds,std_2h)
    return means, stds

def hist_utilisation_quot(df,ax):
    means, stds = mean_std_slots_2h(df)
    ht = ax.bar(np.linspace(0,22,12),means,yerr=stds,width=1.93,align='edge')
    labels = np.linspace(0,24,13,dtype='int').astype('str')
    labels = np.core.defchararray.add(labels, 'h')
    xticks(np.linspace(0,24,13),labels=labels)
    ylabel("Nombre de vélos disponibles")
    title('Nombre de vélos disponibles en moyenne au cours d\'une journée')
    return ht

In [11]:
def tmp_min_max_mean_slots_month(df):
    series = df.set_index(['Timestamp'])[['Slots','TemperatureTemp']]
    series.index = [series.index.month, series.index]
    freqs = np.array([])
    mins = np.array([])
    maxs = np.array([])
    for i in range(1,13):
        freq_utilisation = (series['Slots'].diff() != 0)[i].sum()
        freqs = np.append(freqs,freq_utilisation)
        min = series.loc[i,'TemperatureTemp'].min()
        mins = np.append(mins,min)
        max = series.loc[i,'TemperatureTemp'].max()
        maxs = np.append(maxs,max)
    return freqs, mins, maxs

def annual_temp(df,ax1,ax2):
    freqs, mins, maxs = tmp_min_max_mean_slots_month(df)
    x = np.arange(12)
    figure(figsize=(7,5))
    pl = ax1.plot(x,mins,color='b',marker='o',label='Temp min')
    ax1.plot(x,maxs,color='r',marker='o',label='Temp max')
    ax1.set_xticks(np.linspace(0,12,13))
    labels = ['Janvier', 'Février', 'Mars', 'Avril', 'Mai', 'Juin', 'Juillet', 'Août', 'Septembre', 'Octobre', 'Novembre', 'Décembre']
    ax1.set_xticklabels(labels,rotation='vertical')   
    ax1.set_ylabel('Température en degrée')
    ax2.plot(x,freqs,color='black',marker='o',label='Vélos')    
    ax2.set_ylabel('Nombre de vélos pris ou déposés')
    ax2.set_title("Températures et utilisation des vélos au cours de l'année")
    ax1.legend(loc='upper left')
    ax2.legend(loc='upper right')
    return pl

In [12]:
def mean_std_slots_months(df):
    serie = df.set_index(['Timestamp'])[['Slots']]
    serie.index = [serie.index.month, serie.index]
    means = np.array([])
    stds = np.array([])
    for i in range(1, 13):
        mean = serie.loc[i,'Slots'].mean()
        means = np.append(means,mean)
        std = serie.loc[i, "Slots"].std()
        stds = np.append(stds,std)
    return means, stds

def hist_utilisation_month(df,ax):
    means, stds = mean_std_slots_months(df)
    bar = ax.bar(np.linspace(0,23,12),means,yerr=stds,width=1.93)
    labels = ['Janvier', 'Février', 'Mars', 'Avril', 'Mai', 'Juin', 'Juillet', 'Août', 'Septembre', 'Octobre', 'Novembre', 'Décembre']
    xticks(np.linspace(0,23, 12),labels=labels, rotation="vertical")
    ylabel("Nombre de vélos disponibles")
    title("Nombre de vélos disponibles en moyenne au cours d'une année")
    return bar

In [19]:
def mean_std_slots_week(df):
    serie = df.set_index(['Timestamp'])[['Slots']]
    serie.index = [serie.index.dayofweek, serie.index]
    means = np.array([])
    stds = np.array([])
    for i in range(0, 7):
        mean = serie.loc[i,'Slots'].mean()
        means = np.append(means,mean)
        std = serie.loc[i, "Slots"].std()
        stds = np.append(stds,std)
    return means, stds

def hist_utilisation_week(df,ax):
    means, stds = mean_std_slots_week(df)
    pl = ax.bar(np.linspace(0,13,7),means,yerr=stds,width=1.93)
    labels = ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi', 'Dimanche']
    xticks(np.linspace(0,13, 7),labels=labels, rotation="vertical")
    ylabel("Nombre de vélos disponibles")
    title('Vélos disponibles en moyenne au cours d\'une semaine')
    return pl

In [20]:
liste_selection = wg.Dropdown(                            # Permet une visualisation interactive
    options=stations,
    value=stations[0],
    description=' ',
    disabled=False,
)

def dashboard(liste_selection):        
    fig = figure(figsize=(15,18))
    gcf().subplots_adjust(top = 1.2)
    axes = fig.add_subplot(3,2,1)
    hist_utilisation_quot(dict_df_complets[liste_selection],axes)
    axes = fig.add_subplot(3,2,2)
    show_moustache_status(dict_df_complets[liste_selection],axes)
    axes = fig.add_subplot(3,2,3)
    hist_utilisation_week(dict_df_complets[liste_selection],axes)
    axes = fig.add_subplot(3,2,4,polar=True)
    spider_plot(dict_df_complets[liste_selection],axes)
    axes = fig.add_subplot(3,2,5)
    hist_utilisation_month(dict_df_complets[liste_selection],axes)
    axes1 = fig.add_subplot(3,2,6)
    axes2 = axes1.twinx()
    annual_temp(dict_df_complets[liste_selection],axes1, axes2)   

# Visualisation des données

In [21]:
print("Choisissez une station :")
wg.interactive(dashboard, liste_selection=liste_selection)

Choisissez une station :


interactive(children=(Dropdown(description=' ', options=('01. Duc', '02. Ospedale Maggiore', '03. Traversetolo…