In [1]:
# Add project root to the Python path to allow importing from the 'scripts' folder
import sys
import os
sys.path.append(os.path.abspath(os.path.join('..')))

# Import the consolidated helper script with an alias 'h'
from scripts import project_helpers as h

# Import other necessary libraries
import pandas as pd

# Configure notebook display options
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
pd.set_option('display.max_columns', 50)

In [2]:
# The helper script already defines all paths. We just need to create the output directory.
h.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"Project Base Directory: {h.BASE_DIR}")
print(f"Chart output directory: {h.OUTPUT_DIR.resolve()}")

Project Base Directory: C:\dev\castilla_tourism_portfolio
Chart output directory: C:\dev\castilla_tourism_portfolio\output


## 1. Data Loading

Load all datasets using the functions from our `project_helpers` script.

In [3]:
# --- Load INE Data ---
print("Loading INE data...")
df_ine_ecp = h.load_ine_ecp()
df_ine_padron = h.load_ine_municipal_padron()
df_ine_frontur = h.load_ine_frontur()
df_ine_eoh = h.load_ine_eoh()
print("INE data loaded.")

# --- Load Mobile Data for Castilla-La Mancha (CCAA) ---
print("\nLoading Mobile data for Castilla-La Mancha...")
clm_folder = "CCAA Castilla-La Mancha"
df_mob_clm_diario_demo = h.load_mobile_data(clm_folder, "Diario_Mes_demographics_analysis.parquet")
df_mob_clm_diario_origin = h.load_mobile_data(clm_folder, "Diario_Mes_origin_analysis.parquet")
df_mob_clm_nocturno_origin = h.load_mobile_data(clm_folder, "Nocturno_Mes_origin_analysis.parquet")
df_mob_clm_noche_estancia = h.load_mobile_data(clm_folder, "NocheEstancia_origin_analysis.parquet")

# Add province column to CCAA data for consistent merging with EOH data
if not df_mob_clm_nocturno_origin.empty:
    df_mob_clm_nocturno_origin['provincia'] = '08 Castilla - La Mancha'
if not df_mob_clm_noche_estancia.empty:
    df_mob_clm_noche_estancia['provincia'] = '08 Castilla - La Mancha'
    
print("Mobile data loaded.")
print("\nSample of EOH (Hotel Occupancy) data:")
display(df_ine_eoh.head())

Loading INE data...


AttributeError: 'Series' object has no attribute 'year'

In [None]:
# Cell 4: Analysis 2 - Average Length of Stay Comparison (Using Corrected Function)
# ---------------------------------------------------------------------------------
# This now uses the refined function that filters for tourist categories.
stay_comparison_df = dp.prepare_average_stay_comparison(noche_estancia_clm_df, public_data['eoh'])
# ... (plotting code) ...
fig, ax = plt.subplots(figsize=(15, 8))
stay_comparison_df.plot(x='date', y=['Mobile_Avg_Stay', 'EOH_Avg_Stay'], ax=ax, style=['-o', '--s'], lw=2.5, ms=8)
ax.set_title('Average Length of Stay (Tourists Only): Mobile Data vs. EOH', fontsize=20, pad=20)
ax.set_xlabel('Date', fontsize=14)
ax.set_ylabel('Average Nights per Visitor', fontsize=14)
ax.legend(['Mobile Data Avg. Stay', 'EOH Avg. Stay'], fontsize=12)
plt.tight_layout()
plt.show()
display(stay_comparison_df.head())

In [None]:



# Cell 5: Analysis 3 - City-Level Resident Population Comparison (NEW)
# --------------------------------------------------------------------
# This new analysis compares resident population at the city level
# between mobile data and the official Padrón municipal.

# 1. Prepare data using our new helper function
city_pop_comparison_df = dp.prepare_city_population_comparison(PATH_ANALYTICS, PATH_PUBLIC)

# 2. Reshape data for plotting
city_pop_melted = city_pop_comparison_df.melt(
    id_vars=['year', 'City'], 
    value_vars=['Mobile_Population', 'Padron_Population'],
    var_name='Source', 
    value_name='Population'
)

# 3. Generate visualization
g = sns.catplot(
    data=city_pop_melted,
    x='City',
    y='Population',
    hue='Source',
    col='year',
    kind='bar',
    height=6,
    aspect=1.2
)
g.fig.suptitle('Annual Resident Population: Mobile Data vs. Padrón', y=1.03, fontsize=20)
g.set_axis_labels("City", "Average / Total Population")
g.set_titles("Year {col_name}")
plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.show()

# 4. Display data
print("City-Level Population Comparison Data:")
display(city_pop_comparison_df)