In [12]:
#from flask import Flask, render_template, jsonify, request
import pandas as pd
import eurostat
import json
from shapely.geometry import shape, Point
from pyproj import Transformer


#app = Flask(__name__)

# Load GeoJSON data for regions and countries
#with open('map_app copy/nuts_2021.geojson', 'r', encoding='utf-8') as f:
#    geo_coordinates = json.load(f)

# --- DATA LOAD AND PREPARATION ---
data_analysed = eurostat.get_data_df('tour_ce_omn12')

# Replace month column to make it usable in analysis
data_analysed["month"] = data_analysed["month"].str.replace(r'M', "", regex=True)

# Rename the column for easier use
data_analysed = data_analysed.rename(columns={"geo\\TIME_PERIOD": "GEO"})

# Add years to the rows
data_analysed = data_analysed.melt(
    id_vars=['freq', 'indic_to', 'c_resid', 'month', 'unit', 'GEO'],
    var_name='TIME_PERIOD',
    value_name='bookings'
)

# Define main subsets based on 'indic_to'
Stay_df = data_analysed[data_analysed['indic_to'] == 'STY']
LengthOfStay_df = data_analysed[data_analysed['indic_to'] == 'LSTY']
NightsSpend_df = data_analysed[data_analysed['indic_to'] == 'NGT_SP']

# Create dictionary for subsets
subset_dict = {}
Yvars = {"Stay": Stay_df, "LengthOfStay": LengthOfStay_df, "NightsSpend": NightsSpend_df}
for name, df in Yvars.items():
    subset_dict[f"{name}_DOM"] = df[df["c_resid"] == "DOM"]
    subset_dict[f"{name}_FOR"] = df[df["c_resid"] == "FOR"]
    subset_dict[f"{name}_TOTAL"] = df[df["c_resid"] == "TOTAL"]

# Split data into monthly and annual periods
transformed_data = {}
for name, df in subset_dict.items():
    monthly = df[df["month"] != 'TOTAL'].copy()
    annual = df[df["month"] == 'TOTAL'].copy()
    monthly['month'] = pd.to_datetime(monthly['TIME_PERIOD'] + "-" + monthly['month'], format="%Y-%m")
    annual['month'] = pd.to_datetime(annual['TIME_PERIOD'], format="%Y")
    transformed_data[f"{name}_monthly"] = monthly
    transformed_data[f"{name}_annual"] = annual

transformed_data_cleaned = {key: df.dropna() for key, df in transformed_data.items()}
stay_data = {key: df for key, df in transformed_data_cleaned.items() if 'Stay_' in key}
length_of_stay_data = {key: df for key, df in transformed_data_cleaned.items() if 'LengthOfStay_' in key}
nights_spend_data = {key: df for key, df in transformed_data_cleaned.items() if 'NightsSpend_' in key}


In [13]:
stay_data

{'Stay_DOM_monthly':        freq indic_to c_resid      month unit   GEO TIME_PERIOD  bookings
 30030     A      STY     DOM 2018-01-01   NR    AT        2018    7263.0
 30031     A      STY     DOM 2018-01-01   NR   AT1        2018    2219.0
 30032     A      STY     DOM 2018-01-01   NR  AT11        2018     111.0
 30033     A      STY     DOM 2018-01-01   NR  AT12        2018     295.0
 30034     A      STY     DOM 2018-01-01   NR  AT13        2018    1813.0
 ...     ...      ...     ...        ...  ...   ...         ...       ...
 301450    A      STY     DOM 2024-03-01   NR   SK0        2024   14180.0
 301451    A      STY     DOM 2024-03-01   NR  SK01        2024    3692.0
 301452    A      STY     DOM 2024-03-01   NR  SK02        2024    1681.0
 301453    A      STY     DOM 2024-03-01   NR  SK03        2024    4825.0
 301454    A      STY     DOM 2024-03-01   NR  SK04        2024    3982.0
 
 [28845 rows x 8 columns],
 'Stay_DOM_annual':        freq indic_to c_resid      month uni

In [14]:
length_of_stay_data

{'LengthOfStay_DOM_monthly':        freq indic_to c_resid      month unit   GEO TIME_PERIOD  bookings
 0         A     LSTY     DOM 2018-01-01   NR    AT        2018   23783.0
 1         A     LSTY     DOM 2018-01-01   NR   AT1        2018    8096.0
 2         A     LSTY     DOM 2018-01-01   NR  AT11        2018     239.0
 3         A     LSTY     DOM 2018-01-01   NR  AT12        2018     790.0
 4         A     LSTY     DOM 2018-01-01   NR  AT13        2018    7067.0
 ...     ...      ...     ...        ...  ...   ...         ...       ...
 271420    A     LSTY     DOM 2024-03-01   NR   SK0        2024   32034.0
 271421    A     LSTY     DOM 2024-03-01   NR  SK01        2024    7696.0
 271422    A     LSTY     DOM 2024-03-01   NR  SK02        2024    3498.0
 271423    A     LSTY     DOM 2024-03-01   NR  SK03        2024   11497.0
 271424    A     LSTY     DOM 2024-03-01   NR  SK04        2024    9343.0
 
 [28848 rows x 8 columns],
 'LengthOfStay_DOM_annual':        freq indic_to c_resi

In [15]:
nights_spend_data

{'NightsSpend_DOM_monthly':        freq indic_to c_resid      month unit   GEO TIME_PERIOD  bookings
 15015     A   NGT_SP     DOM 2018-01-01   NR    AT        2018   69008.0
 15016     A   NGT_SP     DOM 2018-01-01   NR   AT1        2018   17237.0
 15017     A   NGT_SP     DOM 2018-01-01   NR  AT11        2018     761.0
 15018     A   NGT_SP     DOM 2018-01-01   NR  AT12        2018    1968.0
 15019     A   NGT_SP     DOM 2018-01-01   NR  AT13        2018   14508.0
 ...     ...      ...     ...        ...  ...   ...         ...       ...
 286435    A   NGT_SP     DOM 2024-03-01   NR   SK0        2024   91918.0
 286436    A   NGT_SP     DOM 2024-03-01   NR  SK01        2024   15951.0
 286437    A   NGT_SP     DOM 2024-03-01   NR  SK02        2024    9049.0
 286438    A   NGT_SP     DOM 2024-03-01   NR  SK03        2024   38936.0
 286439    A   NGT_SP     DOM 2024-03-01   NR  SK04        2024   27982.0
 
 [28848 rows x 8 columns],
 'NightsSpend_DOM_annual':        freq indic_to c_resid 