In [41]:
import os
import plotly.graph_objects as go
import numpy as np
import urllib.request
import io
import xml.etree.ElementTree as ET
import zipfile
import numpy as np
import pandas as pd

In [42]:
# URL = f'https://donnees.roulez-eco.fr/opendata/instantane'
FILE_PATH= 'data/PrixCarburants_instantane.xml'
CITY_PATH = 'data/cities_coordinates.csv'

INIT_POST_CODE = '75001'
INIT_FUEL = 'SP98'
INIT_DIST = 3

R = 6371

TOKEN = 'pk.eyJ1Ijoic2F0eTc2OCIsImEiOiJjbGlmM3JrZG8wOTJsM2VwaWdmazI1bnZyIn0.qZWvh0DdP0JZzMg_7Bz6Xw'

In [43]:
def filter_dataset_from_source_point(stations_df, lat_point, lon_point, distance = 20):
    
    return stations_df.loc[haversine_distance(stations_df.latitude, stations_df.longitude, lat_point, lon_point)<=distance]
    
def filter_dataset_by_gas_type(sub_station_df, gas_types_df, gas_type):
    
    joined_df = sub_station_df.set_index("station_id").join( \
                gas_types_df.set_index("station_id"), how = 'left'
          )
    return joined_df.loc[joined_df.nom==gas_type]
  
def haversine_distance(lat1, lon1, lat2, lon2):
    '''Calculate the distance between two points (lat1,lon1) and (lat2, lon2) in km'''
    
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    
    distance = R * c
    return distance

In [44]:
def parse_pdv(pdv, stations, oil_types):
    '''This function retrieve all the data available in a pdv element'''
    
    station = {}
    #Get attributes of the pdv
    station.update(pdv.attrib)
    
    #It is precised in the doc that lats and longs have to be divided by 100 000
    station['latitude'] = float(station['latitude'])/100000
    station['longitude'] = float(station['longitude'])/100000
    
    #Rename the id key
    station["station_id"] = station["id"]
    del station["id"]
    
    #Explore children, by type of children
    for element in pdv:
        if element.tag == "adresse":
            station["adress"] = element.text
        if element.tag == "ville":
            station["city"] = element.text
        if element.tag == "horaires":   # horaires: the hours of openings of the station
            station = parse_schedules(element, station) 
        if element.tag == 'prix': # prix: the price of each fuel type
            oil_types = parse_oil(element, station, oil_types)
            
    stations.append(station)
    return stations, oil_types

def parse_schedules(schedule_element, station):
    '''This function retrieve the schedules time for a schedule_element'''
    station["automate_24_24"] = "Yes" if schedule_element.attrib["automate-24-24"]=="1" else np.nan
    
    for day in schedule_element:
        day_of_week = day.attrib["nom"]
        for schedule in schedule_element:
            opening = np.nan
            closing = np.nan
            for time in schedule:
                opening = time.attrib["ouverture"]
                closing = time.attrib["fermeture"]
                
            station[f"{day_of_week}_opening"] = opening
            station[f"{day_of_week}_closing"] = closing
            
    return station

def parse_oil(oil_element, station, oil_types):
    '''This function retrieve information relative to a oil type in a station'''
    
    #Integrate the station_id as secondary key
    oil_type = {"station_id":station["station_id"]}
    
    #Add other attributes
    oil_type.update(oil_element.attrib)
    oil_type["oil_id"] = f'{oil_type["station_id"]}_{oil_type["id"]}'
    del oil_type["id"]
    
    #Add the oil to our list of oils
    oil_types.append(oil_type)
    return oil_types

def parse_xml(filepath):
    '''Parse the whole xml'''

    #Create the tree using xml parser
    tree = ET.parse(filepath)
    #Access the root node
    root = tree.getroot()

    #Initiate stations and oil_types
    stations = []
    oil_types = []

    for pdv in root:
        stations, oil_types = parse_pdv(pdv, stations, oil_types)
     
    stations_df = pd.DataFrame(stations)
    oil_types_df = pd.DataFrame(oil_types)
    
    return stations_df, oil_types_df

In [45]:
def points_on_circle(lat, lon, radius, num_points):
    '''This function return a list of coordinates as lat/lon of size 
        num_points forming a circle of a certain radius centered on a 
       a point of coordinate lat,lon
    '''
    points = []
    R = 6371
    for i in range(num_points):
        bearing = 360/num_points*i
        lat2 = np.arcsin(np.sin(np.radians(lat))*np.cos(radius/R) +
                    np.cos(np.radians(lat))*np.sin(radius/R)*np.cos(np.radians(bearing)))
        lon2 = np.radians(lon) + np.arctan2(np.sin(np.radians(bearing))*np.sin(radius/R)*np.cos(np.radians(lat)),
                                   np.cos(radius/R)-np.sin(np.radians(lat))*np.sin(lat2))
        points.append((np.degrees(lat2), np.degrees(lon2)))
    points = np.array(points)
    points = np.vstack([points, points[0]])
    
    return points[:,0], points[:,1] 

def init_chart(sub_df, init_lat, init_lon, distance):

    stations = go.Scattermapbox(   # plot station
        lat= sub_df.latitude,
        lon= sub_df.longitude,
        mode='markers',
        marker=dict(
            size=14,
            color='white',
            symbol = 'fuel',
        ),
        text=sub_df.valeur+' €/L',
        showlegend = False,
    )

    prices = go.Scattermapbox(   # plot station
        lat= sub_df.latitude,
        lon= sub_df.longitude,
        mode='markers',
        marker=dict(
            size=40,
            color=sub_df.valeur.astype(float),
            colorscale ='PiYG_r',
        ),
        opacity = 0.7,
        text=sub_df.valeur+' €/L',
        showlegend = False,
    )

    solid_price_border = go.Scattermapbox( #
        lat= sub_df.latitude,
        lon= sub_df.longitude,
        mode='markers',
        marker=dict(
            size=45,
            color='black',
        ),
        opacity = 0.8,
        text=sub_df.valeur+' €/L',
        showlegend = False,
    )
## we get all points that lie on the research circle. WE then connect them using line
    lats, longs = points_on_circle(init_lat,init_lon, distance, 50)  
    
    research_zone = go.Scattermapbox(
        lat= lats,
        lon= longs,
        mode='lines',
        fill='toself',
        fillcolor = "rgba(1,1,1,0.2)",
        marker=dict(
            size=45,
            color='black',
        ),
        opacity = 0.8,
        showlegend = False,
    )

    user_position = go.Scattermapbox(
        lat= [init_lat],
        lon= [init_lon],
        mode='markers',
        marker=dict(
            size=10,
            color='red',
        ),
        opacity = 1,
        showlegend = False,
    )

    #create the layout
    layout = go.Layout(
        height = 600,
        width = 600,
        mapbox=dict(
            accesstoken=TOKEN,
            style='streets',
            center=dict(
                lat=init_lat,
                lon=init_lon
            ),
            zoom=11,
        ),
        margin=dict(l=0, r=0, t=0, b=0)
    )
    return go.FigureWidget(data=[research_zone, solid_price_border,prices, stations, user_position], layout=layout)

In [46]:
import pandas as pd

from ipywidgets import Text, Dropdown, VBox, HBox
from IPython.display import display


class StationWidget:
    
    def __init__(self):
        self._stations_df , self._gas_types_df = parse_xml(FILE_PATH)
        # print(self._stations_df)
        self._cities_df = pd.read_csv(CITY_PATH, index_col=0, dtype={"postal_code":str})
        self._distance = INIT_DIST
        self._fuel = INIT_FUEL
        self._post_code = INIT_POST_CODE
        self._filter_by_postal_code(self._post_code, self._distance, self._fuel)
        self._init_layout()
        
    def _update_fig(self):
        new_fig = init_chart(self._sub_station_with_prices, self._lat_pos, self._lon_pos, self._distance)
        with self._fig.batch_update():
            #Update all the lat longs for our 5 traces
            for k in range(5):
                self._fig.data[k].lat = new_fig.data[k].lat
                self._fig.data[k].lon = new_fig.data[k].lon

            #Update the color markers of the prices
            self._fig.data[2].marker = new_fig.data[2].marker
            
            #Update the overlayed text
            for k in [1,2,3]:
                self._fig.data[k].text = new_fig.data[k].text

            #Update new center of the map
            center = {"lat":self._lat_pos, "lon":self._lon_pos}
            self._fig.layout.mapbox.center = center     
        
    def _filter_by_postal_code(self, postal_code, distance_km, gas_type):

        sub_cities = self._cities_df.loc[self._cities_df.postal_code==postal_code]

        #If the postal code is not value, raise a value error
        if len(sub_cities)==0:
            return 0

        self._lat_pos, self._lon_pos = sub_cities.iloc[0][["lat","lon"]]
        sub_station_df = filter_dataset_from_source_point(self._stations_df, self._lat_pos, self._lon_pos, distance_km)
       
        self._sub_station_with_prices = filter_dataset_by_gas_type(sub_station_df, self._gas_types_df, gas_type)[["latitude" ,"longitude" ,"cp" ,"city","nom", "valeur"]]
        return 1
    
    def _init_layout(self):
        '''This function initiate the widget layout and the different callbacks'''
        #Select fuel dropdown
        fuel_type_dropdown = Dropdown(options  = self._gas_types_df.nom.unique(), 
                                      value = INIT_FUEL, 
                                      description = "Fuel type")
        fuel_type_dropdown.observe(self._on_fuel_change, names='value')
        
        #Select max distance dropdown
        distance_dropdrown = Dropdown(options  = [*range(1,30)], 
                                      value = INIT_DIST, 
                                      description = "distance (km)")
        distance_dropdrown.observe(self._on_distance, names='value')
        
        #Select postal code from text
        postal_code_text= Text(placeholder="Postal Code")
        postal_code_text.observe(self._on_change_text, names='value')
        
        #Create the figure based on initial value
        self._fig = init_chart(self._sub_station_with_prices, 
                               self._lat_pos, 
                               self._lon_pos, 
                               self._distance)
        
        #Create the widget 
        self._widget = VBox([postal_code_text,
                            HBox([fuel_type_dropdown, 
                                  distance_dropdrown]), 
                            self._fig])
        
    def _on_change_text(self,change):
        
        new_pc = str(change["new"])
        done = self._filter_by_postal_code(new_pc, self._distance, self._fuel)
        if done:
            self._post_code = new_pc
            self._update_fig()

    def _on_fuel_change(self, change):
        self._fuel = change["new"]
        done = self._filter_by_postal_code(self._post_code, self._distance, self._fuel)
        if done:
            self._update_fig()    

    def _on_distance(self, change):
        self._distance = change["new"]
        done = self._filter_by_postal_code(self._post_code, self._distance, self._fuel)
        if done:
            self._update_fig()
            
    def display(self):
        display(self._widget)

In [47]:
StationWidget().display()

VBox(children=(Text(value='', placeholder='Postal Code'), HBox(children=(Dropdown(description='Fuel type', ind…