# ⛽ Mexico Gas Prices Analysis
**Author:** Eduardo Torres <br>
**Date:** 2025-10-03 <br>
**Purpose:** Analyze gas prices in Mexico <br>

## Imports & Setup

Daily snapshot extract info

In [None]:
import sys
from pathlib import Path
import pandas as pd

# Add the parent directory to Python path
notebook_dir = Path.cwd()
project_root = notebook_dir.parent  # Go up one level
sys.path.insert(0, str(project_root))

# Correr daily snapshot
from src.etl.etl import save_daily_snapshot, extract_places_xml, extract_prices_xml
PLACES_PATH = Path('../data/places.xml')
PRICES_PATH = Path('../data/prices.xml')
save_daily_snapshot(extract_places_xml(PLACES_PATH),extract_prices_xml(PRICES_PATH),output_dir='../data/raw')

2025-10-04 20:33:26,416 - src.etl.etl - INFO - Extracting places from ..\data\places.xml
2025-10-04 20:33:26,645 - src.etl.etl - INFO - Extracting prices from ..\data\prices.xml
2025-10-04 20:33:26,750 - src.etl.etl - INFO - Creating daily snapshot
2025-10-04 20:33:27,052 - src.etl.etl - INFO - Daily snapshot saved to ..\data\raw\gas_prices_20251004.csv.gz


In [5]:
import hashlib

def get_file_hash(file_path):
    """Returns the MD5 hash of a file."""
    with open(file_path, "rb") as f:
        file_content = f.read()
    return hashlib.md5(file_content).hexdigest()

# Example usage
xml_file = "../data/prices.xml"
previous_hash = None  # store this from last check

current_hash = get_file_hash(xml_file)

if previous_hash is None:
    print("First check, storing hash...")
    previous_hash = current_hash
elif previous_hash != current_hash:
    print("The XML file has changed!")
    previous_hash = current_hash
else:
    print("No changes detected in the XML.")

First check, storing hash...


## 📂 Data Loading

In [3]:
df = pd.read_csv('../data/processed/mexico_gas_prices.csv.gz', compression='gzip')

df

Unnamed: 0,place_id,name,cre_id,longitude,latitude,gas_type,price,date
0,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,regular,22.69,2025-10-02
1,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,premium,26.99,2025-10-02
2,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,diesel,26.09,2025-10-02
3,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.30370,regular,23.99,2025-10-02
4,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.30370,premium,25.39,2025-10-02
...,...,...,...,...,...,...,...,...
120348851,11703,SERVICIO LAS TORRES SUR SA DE CV,PL/9999/EXP/ES/2015,-101.66340,21.14916,regular,,2025-08-29
120348852,11703,SERVICIO LAS TORRES SUR SA DE CV,PL/9999/EXP/ES/2015,-101.66340,21.14916,premium,,2025-08-30
120348853,11703,SERVICIO LAS TORRES SUR SA DE CV,PL/9999/EXP/ES/2015,-101.66340,21.14916,regular,,2025-08-30
120348854,11703,SERVICIO LAS TORRES SUR SA DE CV,PL/9999/EXP/ES/2015,-101.66340,21.14916,premium,,2025-08-31


In [4]:
df.dtypes

place_id       int64
name          object
cre_id        object
longitude    float64
latitude     float64
gas_type      object
price        float64
date          object
dtype: object

In [None]:
df['date'] = pd.to_datetime(df['date'], format='ISO8601').dt.date

In [None]:
df

Unnamed: 0,place_id,name,cre_id,longitude,latitude,gas_type,price,date
0,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,regular,22.69,2025-10-02
1,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,premium,26.99,2025-10-02
2,2039,"ESTACION HIPODROMO, S.A. DE C.V.",PL/658/EXP/ES/2015,-116.92140,32.47641,diesel,26.09,2025-10-02
3,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.30370,regular,23.99,2025-10-02
4,2040,"LAS MEJORES ESTACIONES, S.A DE C.V",PL/902/EXP/ES/2015,-99.74484,20.30370,premium,25.39,2025-10-02
...,...,...,...,...,...,...,...,...
120348851,11703,SERVICIO LAS TORRES SUR SA DE CV,PL/9999/EXP/ES/2015,-101.66340,21.14916,regular,,2025-08-29
120348852,11703,SERVICIO LAS TORRES SUR SA DE CV,PL/9999/EXP/ES/2015,-101.66340,21.14916,premium,,2025-08-30
120348853,11703,SERVICIO LAS TORRES SUR SA DE CV,PL/9999/EXP/ES/2015,-101.66340,21.14916,regular,,2025-08-30
120348854,11703,SERVICIO LAS TORRES SUR SA DE CV,PL/9999/EXP/ES/2015,-101.66340,21.14916,premium,,2025-08-31
