In [1]:
#### Data Processing ####
import pandas as pd

#### Scraping ####
import requests
from lxml import html

#### Plot ####
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Bar, Scatter, Figure, Layout, Marker, Box
import colorlover as cl
init_notebook_mode(connected=True)

In [2]:
def eco2mix_parser(url):
    
    # Load the page contening the informations
    page = requests.get(url)
    tree = html.fromstring(page.content)
    
    # Create the data frame and fill the Date&Time column from the url request
    data = pd.DataFrame()
    data.loc[:,'Date&Time'] = pd.date_range(tree.xpath("//date_debut/text()")[0], periods=len(tree.xpath("//type[@v='Nucléaire' and @granularite='Global']/valeur/text()")), freq='15Min')
    
    # Parse the information about the Energy Production
    types = tree.xpath("//type")
    for t in types:
        data.loc[:,t.get('v') + '-' + t.get('granularite')] = pd.Series(tree.xpath("//type[@v='" + t.get('v') + "' and @granularite='" + t.get('granularite') + "']/valeur/text()"))
    
    return data

# Settings

In [3]:
# Change the start date, dateDeb, and the end date, dateFin, as following: 
dateDeb = "01/09/2016" # format dd/mm/yyyy
dateFin = "20/09/2016" # format dd/mm/yyyy

url = "http://www.rte-france.com/getEco2MixXml.php?type=mix&&dateDeb=" + dateDeb +"&dateFin=" + dateFin +"&mode=NORM"

# Main

In [4]:
data = eco2mix_parser(url)
print(data.shape)
data.head()

(480, 26)


Unnamed: 0,Date&Time,Nucléaire-Global,Charbon-Global,Gaz-Global,Fioul-Global,Pointe-Global,Fioul + Pointe-Global,Hydraulique-Global,Eolien-Global,Solde-Global,...,Gaz-TAC,Gaz-COG,Gaz-CCG,Gaz-AUT,Hydraulique-FEE,Hydraulique-LAC,Hydraulique-STT,Autres-DEC,Autres-BMA,Autres-BGA
0,2016-09-01 00:00:00,37039,1338,4425,183,ND,ND,4662,1201,-1470,...,4,425,3580,416,3024,1322,316,489,251,213
1,2016-09-01 00:15:00,36313,1347,4257,193,ND,ND,4166,1167,-4211,...,5,449,3224,578,2818,1308,40,512,260,216
2,2016-09-01 00:30:00,36111,1259,4029,192,ND,ND,3689,877,-3573,...,5,451,3098,475,2627,957,105,515,265,217
3,2016-09-01 00:45:00,35891,1124,3805,191,ND,ND,3400,741,-4749,...,5,405,2929,467,2613,788,0,504,265,218
4,2016-09-01 01:00:00,35603,1034,3360,191,ND,ND,3413,728,-4824,...,6,363,2647,344,2604,810,0,521,258,217


In [5]:
#### Plot

iplot({
    'data': [
        Scatter(x=data['Date&Time'],y=data['Fioul-Global'],mode='lines',name = 'Fioul (MW)'),
        Scatter(x=data['Date&Time'],y=data['Charbon-Global'],mode='lines',name = 'Charbon (MW)'),
        Scatter(x=data['Date&Time'],y=data['Gaz-Global'],mode='lines',name = 'Gaz (MW)'),
        Scatter(x=data['Date&Time'],y=data['Hydraulique-Global'],mode='lines',name = 'Hydraulique (MW)'),
        Scatter(x=data['Date&Time'],y=data['Nucléaire-Global'],mode='lines',name = 'Nucléaire (MW)'),
        Scatter(x=data['Date&Time'],y=data['Solaire-Global'],mode='lines',name = 'Solaire (MW)'),
        Scatter(x=data['Date&Time'],y=data['Eolien-Global'],mode='lines',name = 'Eolien (MW)'),
        Scatter(x=data['Date&Time'],y=data['Autres-Global'],mode='lines',name = 'Bioénergies (MW)'),
        Scatter(x=data['Date&Time'],y=data['Pompage-Global'],mode='lines',name = 'Pompage (MW)'),
        Scatter(x=data['Date&Time'],y=data['Solde-Global'],mode='lines',name = 'Exports (MW)'),  
    ],
    'layout': Layout(title="""Production Nationale Française d'Électricité entre le """+dateDeb +' et le '+dateFin)
}, show_link=False)