In [2]:
import os
import requests
import pandas as pd
from lxml import etree
from datetime import datetime

Información obtenida de https://historico.datos.gob.mx/busca/dataset/estaciones-de-servicio-gasolineras-y-precios-finales-de-gasolina-y-diesel
y el histórico de https://www.gob.mx/cre/articulos/precios-vigentes-de-gasolinas-y-diesel en el apartado "Histórico de precios de gasolina y diésel por permisionarios.

Opción final para github, descarga desde el código el xml y actualiza, solo la VPN no me deja en la del trabajo.

Opción de descarga directo desde el .xml

## Places

In [8]:
# Example using pandas if it’s a simple XML table
tree = etree.parse("../data/places.xml")
root = tree.getroot()

# Extract data
data = []
for place in root.findall('place'):
    place_id = place.get('place_id')
    name = place.findtext('name')
    cre_id = place.findtext('cre_id')
    x = float(place.find('location/x').text)
    y = float(place.find('location/y').text)
    
    data.append({
        "place_id": place_id,
        "name": name,
        "cre_id": cre_id,
        "longitude": x,
        "latitude": y
    })

# Convert to DataFrame
df_places = pd.DataFrame(data)
df_places.head()

Unnamed: 0,place_id,name,cre_id,longitude,latitude
0,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.9214,32.47641
1,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.3037
2,2041,"DIAZ GAS, S.A. DE C.V.",PL/760/EXP/ES/2015,-106.4514,31.71947
3,2042,"COMBU-EXPRESS, S.A. DE C.V.",PL/825/EXP/ES/2015,-103.3042,20.71413
4,2043,"PETROMAX, S.A. DE C.V.",PL/585/EXP/ES/2015,-98.29977,26.03787


## Prices

In [9]:
# Example using pandas if it’s a simple XML table
tree = etree.parse("../data/prices.xml")
root = tree.getroot()

# Extract data into a list of dicts
data = []
for place in root.findall('place'):
    place_id = place.get('place_id')
    for gas in place.findall('gas_price'):
        data.append({
            "place_id": place_id,
            "gas_type": gas.get('type'),
            "price": float(gas.text)
        })

# Convert to DataFrame
df_prices = pd.DataFrame(data)
df_prices.head()

Unnamed: 0,place_id,gas_type,price
0,11703,regular,22.95
1,11703,premium,23.89
2,11702,regular,24.5
3,11702,premium,27.3
4,11702,diesel,27.65


## Merging

In [18]:
df_total = df_places.merge(df_prices,how = 'left', on = 'place_id')

now = datetime.now()
# Format for filename
timestamp = now.strftime("%Y%m%d_%H%M%S")

df_total.to_csv(f'../data/raw/gas_prices_{timestamp}.csv')
df_total.head()

Unnamed: 0,place_id,name,cre_id,longitude,latitude,gas_type,price
0,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.9214,32.47641,regular,22.69
1,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.9214,32.47641,premium,26.99
2,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.9214,32.47641,diesel,26.09
3,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.3037,regular,23.99
4,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.3037,premium,25.39


## Historical Prices

In [22]:
df_historical = pd.read_csv('../data/raw/gas_historical_prices.csv',encoding='latin-1')
df_historical

Unnamed: 0,NumeroPermiso,SubProducto,01/01/2017,02/01/2017,03/01/2017,04/01/2017,05/01/2017,06/01/2017,07/01/2017,08/01/2017,...,22/08/2025,23/08/2025,24/08/2025,25/08/2025,26/08/2025,27/08/2025,28/08/2025,29/08/2025,30/08/2025,31/08/2025
0,PL/1000/EXP/ES/2015,Diésel Automotríz,,,17.06,17.06,17.06,17.06,17.06,17.06,...,25.69,25.69,25.69,25.69,25.69,25.79,25.79,25.79,25.79,25.79
1,PL/1000/EXP/ES/2015,Premium,,,18.19,18.19,18.19,18.19,18.19,18.19,...,26.29,26.49,26.49,26.49,26.49,26.49,26.49,26.49,26.49,26.49
2,PL/1000/EXP/ES/2015,Regular,,,16.32,16.32,16.32,16.32,16.32,16.32,...,23.99,23.99,23.99,23.99,23.99,23.99,23.99,23.99,23.99,23.99
3,PL/10000/EXP/ES/2015,Diésel Automotríz,17.16,17.16,17.16,17.16,17.16,17.16,17.16,17.16,...,24.93,24.93,24.93,24.93,24.93,24.93,24.93,24.93,24.93,24.93
4,PL/10000/EXP/ES/2015,Premium,17.86,17.86,17.86,17.86,17.86,17.86,17.86,17.86,...,25.56,25.56,25.56,25.56,25.56,25.56,25.56,25.56,25.56,25.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38007,PL/9998/EXP/ES/2015,Diésel Automotríz,,,17.34,17.34,17.34,17.34,17.34,17.34,...,,,,,,,,,,
38008,PL/9998/EXP/ES/2015,Premium,,,18.01,18.01,18.01,18.01,18.01,18.01,...,,,,,,,,,,
38009,PL/9998/EXP/ES/2015,Regular,,,16.24,16.24,16.24,16.24,16.24,16.24,...,,,,,,,,,,
38010,PL/9999/EXP/ES/2015,Premium,17.86,17.86,17.86,17.86,17.86,17.86,17.86,17.86,...,,,,,,,,,,


In [23]:
df_historical['SubProducto'].value_counts()

SubProducto
Regular                               13884
Premium                               13026
Diésel Automotríz                     10133
Diésel de Ultra Bajo Azufre (DUBA)      924
Diésel Agrícola/Marino                   44
Diésel Industrial                         1
Name: count, dtype: int64

In [24]:
df_total

Unnamed: 0,place_id,name,cre_id,longitude,latitude,gas_type,price
0,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,regular,22.69
1,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,premium,26.99
2,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,diesel,26.09
3,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.30370,regular,23.99
4,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.30370,premium,25.39
...,...,...,...,...,...,...,...
36258,30188,"GAZPRO, S.A. DE C.V.",PL/26244/EXP/ES/2025,-100.46520,20.62897,,
36259,30189,"PETRALL, S.A. DE C.V.",PL/26245/EXP/ES/2025,-102.39300,21.88831,regular,23.98
36260,30189,"PETRALL, S.A. DE C.V.",PL/26245/EXP/ES/2025,-102.39300,21.88831,premium,26.05
36261,30189,"PETRALL, S.A. DE C.V.",PL/26245/EXP/ES/2025,-102.39300,21.88831,diesel,26.90
