In [1]:
import pandas as pd
from pathlib import Path
import warnings

In [2]:
# Suppress potential future warnings from pandas
warnings.simplefilter(action='ignore', category=FutureWarning)

# --- Configuration ---
# Use Path for OS compatibility. '../' goes up one level from the 'notebooks' folder to the project root.
PROJECT_ROOT = Path("..") 
ANALYTICS_DATA_DIR = PROJECT_ROOT / "data" / "analytics"

# --- Pick a file to inspect ---
file_to_check = "CCAA Castilla-La Mancha/Llegada_demographics_analysis.parquet"
file_path = ANALYTICS_DATA_DIR / file_to_check

# --- Load the data ---
print(f"Loading data from: {file_path}")
df = pd.read_parquet(file_path)

# --- Inspect the DataFrame ---
print("\nShape (rows, columns):", df.shape)
print("\nColumns:", df.columns.to_list())

# Display the first 5 rows
df.head(20)

Loading data from: ..\data\analytics\CCAA Castilla-La Mancha\Llegada_demographics_analysis.parquet

Shape (rows, columns): (5484, 17)

Columns: ['fecha', 'origen', 'categoriadelvisitante', 'volumen_total', 'volumen_edad_18-24', 'volumen_edad_25-34', 'volumen_edad_35-44', 'volumen_edad_45-54', 'volumen_edad_55-64', 'volumen_edad_65 o más', 'volumen_edad_<18', 'volumen_genero_h', 'volumen_genero_m', 'volumen_genero_nr', 'year', 'month', 'weekday']


Unnamed: 0,fecha,origen,categoriadelvisitante,volumen_total,volumen_edad_18-24,volumen_edad_25-34,volumen_edad_35-44,volumen_edad_45-54,volumen_edad_55-64,volumen_edad_65 o más,volumen_edad_<18,volumen_genero_h,volumen_genero_m,volumen_genero_nr,year,month,weekday
0,2022-06-01,Extranjero,Habitualmente presente,272,,,,,,,,,,,2022,6,Wednesday
1,2022-06-01,Extranjero,Turista,6310,,,,,,,,,,,2022,6,Wednesday
2,2022-06-01,Local,Habitualmente presente,2542,34.0,185.0,295.0,278.0,353.0,287.0,192.0,948.0,676.0,,2022,6,Wednesday
3,2022-06-01,Local,Turista,1106,57.0,51.0,75.0,109.0,105.0,226.0,43.0,319.0,347.0,,2022,6,Wednesday
4,2022-06-01,NoLocal,Habitualmente presente,6918,126.0,461.0,776.0,745.0,763.0,1069.0,523.0,2768.0,1695.0,,2022,6,Wednesday
5,2022-06-01,NoLocal,Turista,12707,262.0,824.0,1557.0,1532.0,1180.0,1670.0,810.0,4715.0,3115.0,,2022,6,Wednesday
6,2022-06-02,Extranjero,Habitualmente presente,279,,,,,,,,,,,2022,6,Thursday
7,2022-06-02,Extranjero,Turista,6285,,,,,,,,,,,2022,6,Thursday
8,2022-06-02,Local,Habitualmente presente,3464,109.0,287.0,389.0,493.0,454.0,278.0,314.0,1317.0,1007.0,,2022,6,Thursday
9,2022-06-02,Local,Turista,1343,,95.0,111.0,134.0,195.0,193.0,66.0,450.0,355.0,,2022,6,Thursday


In [3]:
unique_values = df['categoriadelvisitante'].unique()
print(unique_values)

['Habitualmente presente' 'Turista']


In [4]:
len(df)

5484