In [52]:
import pandas as pd
import eurostat

# Import data using the Eurostat API
data = eurostat.get_data_df('TOUR_CE_OMN12')

#Drop columns'freq','unit'
data.drop(columns=['freq','unit'], errors='ignore', inplace=True)

#add column geo_layer to label geographic regions (NUTS codes).
df = pd.DataFrame(data)
df['geo_layer'] = df['geo\TIME_PERIOD'].apply(lambda x: 'NUT1' if len(x) == 3 else ('NUT2' if len(x) == 4 else ('Country' if len(x) == 2 else '[EU27_2020]')))
#put column "geo" besides column "geo_layer" 
col_order = ['geo\TIME_PERIOD', 'geo_layer'] + [col for col in df.columns if col not in ['geo\TIME_PERIOD', 'geo_layer']]
df = df[col_order]

#Drop row where 'c_resid' and 'month' value is total
df = df[df['c_resid'] != 'TOTAL']
df = df[df['month'] != 'TOTAL']

#Change colomn label for 'geo\TIME_PERIOD' to 'geo'
df.columns.values[df.columns.get_loc('geo\TIME_PERIOD')] = 'geo'

long_df = pd.melt(df, id_vars=['geo', 'geo_layer', 'indic_to', 'c_resid', 'month'], value_vars=['2018', '2019', '2020', '2021', '2022', '2023', '2024'], var_name='Year', value_name='Value') 
# Drop rows with any missing values
long_df.dropna(inplace=True)
# Drop rows where Year is 2024
long_df = long_df[long_df['Year'] != '2024']
#Summarize data by month, guest residence, or geo_layer
summary_df = long_df.groupby(['month', 'c_resid', 'indic_to', 'geo_layer','Year']).agg({
    'Value': 'sum',
}).reset_index()
#summary_file = 'processed_data.xlsx'
#summary_df.to_excel(summary_file, index=False)
summary_df


Unnamed: 0,month,c_resid,indic_to,geo_layer,Year,Value
0,M01,DOM,LSTY,Country,2018,2076982.0
1,M01,DOM,LSTY,Country,2019,2517764.0
2,M01,DOM,LSTY,Country,2020,3342210.0
3,M01,DOM,LSTY,Country,2021,2425589.0
4,M01,DOM,LSTY,Country,2022,4108208.0
...,...,...,...,...,...,...
1723,M12,FOR,STY,[EU27_2020],2019,1563624.0
1724,M12,FOR,STY,[EU27_2020],2020,171960.0
1725,M12,FOR,STY,[EU27_2020],2021,893514.0
1726,M12,FOR,STY,[EU27_2020],2022,1548701.0


In [53]:
long_df

Unnamed: 0,geo,geo_layer,indic_to,c_resid,month,Year,Value
0,AT,Country,LSTY,DOM,M01,2018,23783.0
1,AT1,NUT1,LSTY,DOM,M01,2018,8096.0
2,AT11,NUT2,LSTY,DOM,M01,2018,239.0
3,AT12,NUT2,LSTY,DOM,M01,2018,790.0
4,AT13,NUT2,LSTY,DOM,M01,2018,7067.0
...,...,...,...,...,...,...,...
166315,SK0,NUT1,STY,FOR,M12,2023,18620.0
166316,SK01,NUT2,STY,FOR,M12,2023,8896.0
166317,SK02,NUT2,STY,FOR,M12,2023,1296.0
166318,SK03,NUT2,STY,FOR,M12,2023,4891.0


In [55]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px
import json

# Convert the month column to a category type and specify the order of the categories
month_order = [f"M{str(i).zfill(2)}" for i in range(1, 13)]
summary_df['month'] = pd.Categorical(summary_df['month'], categories=month_order, ordered=True)

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Interactive Charts"),

    # Line chart controls
    html.H2("Line Chart Controls"),
    dcc.Dropdown(
        id='geo_layer-dropdown',
        options=[{'label': geo_layer, 'value': geo_layer} for geo_layer in summary_df['geo_layer'].unique()],
        value=summary_df['geo_layer'].unique()[0]
    ),
    dcc.Dropdown(
        id='indic_to-dropdown',
        options=[{'label': indic_to, 'value': indic_to} for indic_to in summary_df['indic_to'].unique()],
        value=summary_df['indic_to'].unique()[0]
    ),
    dcc.Dropdown(
        id='c_resid-dropdown',
        options=[{'label': 'All', 'value': 'All'}] + [{'label': c_resid, 'value': c_resid} for c_resid in summary_df['c_resid'].unique()],
        value='All'
    ),
    dcc.Graph(id='line-chart'),

    # Bar chart controls
    html.H2("Bar Chart Controls"),
    dcc.Dropdown(
        id='indic_to_filter',
        options=[{'label': indicator, 'value': indicator} for indicator in summary_df['indic_to'].unique()],
        value=summary_df['indic_to'].unique()[0],
        multi=False,
        placeholder="Select Indicator"
    ),
    dcc.Dropdown(
        id='year_filter',
        options=[{'label': year, 'value': year} for year in summary_df['Year'].unique()],
        value=summary_df['Year'].unique()[0],
        multi=False,
        placeholder="Select Year"
    ),
    dcc.Dropdown(
        id='geo_layer_filter',
        options=[{'label': geo, 'value': geo} for geo in summary_df['geo_layer'].unique()],
        value=summary_df['geo_layer'].unique()[0],
        multi=False,
        placeholder="Select Geographical Layer"
    ),
    dcc.Graph(id='bar_chart'),

    # New Pie Chart Controls
    html.H2("Pie Chart Controls"),
    dcc.Dropdown(
        id='pie_chart_indic_to',
        options=[{'label': indic_to, 'value': indic_to} for indic_to in summary_df['indic_to'].unique()],
        value=summary_df['indic_to'].unique()[0]
    ),
    dcc.Dropdown(
        id='pie_chart_geo_layer',
        options=[{'label': geo_layer, 'value': geo_layer} for geo_layer in summary_df['geo_layer'].unique()],
        value=summary_df['geo_layer'].unique()[0],
        multi=False,
        placeholder="Select Geographical Layer"
    ),
    dcc.Dropdown(
        id='pie_chart_year',
        options=[{'label': year, 'value': year} for year in summary_df['Year'].unique()],
        value=summary_df['Year'].unique()[0],
        multi=False,
        placeholder="Select Year"
    ),
    dcc.Dropdown(
        id='pie_chart_month',
        options=[{'label': month, 'value': month} for month in summary_df['month'].unique()],
        value=summary_df['month'].unique()[0],
        multi=False,
        placeholder="Select Month"
    ),
    dcc.Graph(id='pie_chart'),# New graph for the pie chart

    # 新增热图控件
    html.H2("Geographical Heatmap Controls"),
    dcc.Dropdown(
        id='heatmap_indic_to',
        options=[{'label': indic_to, 'value': indic_to} for indic_to in long_df['indic_to'].unique()],
        value=long_df['indic_to'].unique()[0],
        multi=False,
        placeholder="Select Indicator"
    ),
    dcc.Dropdown(
        id='heatmap_year',
        options=[{'label': year, 'value': year} for year in long_df['Year'].unique()],
        value=long_df['Year'].unique()[0],
        multi=False,
        placeholder="Select Year"
    ),
    dcc.Dropdown(
        id='heatmap_c_resid',
        options=[{'label': c_resid, 'value': c_resid} for c_resid in long_df['c_resid'].unique()],
        value=long_df['c_resid'].unique()[0],
        multi=False,
        placeholder="Select Residency"
    ),
    dcc.Dropdown(
        id='heatmap_month',
        options=[{'label': month, 'value': month} for month in long_df['month'].unique()],
        value=long_df['month'].unique()[0],
        multi=False,
        placeholder="Select Month"
    ),
    dcc.Dropdown(
        id='heatmap_geo_layer',
        options=[{'label': geo_layer, 'value': geo_layer} for geo_layer in long_df['geo_layer'].unique()],
        value=long_df['geo_layer'].unique()[0],
        multi=False,
        placeholder="Select geo_layer"
    ),    
    dcc.Graph(id='geo_heatmap')  # 用于展示热图的组件
])
@app.callback(
    Output('line-chart', 'figure'),
    [Input('geo_layer-dropdown', 'value'),
     Input('indic_to-dropdown', 'value'),
     Input('c_resid-dropdown', 'value')]
)
def update_line_chart(selected_geo_layer, selected_indic_to, selected_c_resid):
    if selected_c_resid == 'All':
        filtered_df = summary_df[
            (summary_df['geo_layer'] == selected_geo_layer) &
            (summary_df['indic_to'] == selected_indic_to)
        ].groupby(['month', 'Year']).sum().reset_index()
    else:
        filtered_df = summary_df[
            (summary_df['geo_layer'] == selected_geo_layer) &
            (summary_df['indic_to'] == selected_indic_to) &
            (summary_df['c_resid'] == selected_c_resid)
        ]
    
    fig = px.line(
        filtered_df, x='month', y='Value', color='Year', 
        title=f"Line Chart for {selected_geo_layer}, {selected_indic_to}, {selected_c_resid}"
    )
    return fig

@app.callback(
    Output('bar_chart', 'figure'),
    Input('indic_to_filter', 'value'),
    Input('year_filter', 'value'),
    Input('geo_layer_filter', 'value')
)
def update_bar_chart(selected_indicator, selected_year, selected_geo_layer):
    filtered_df = summary_df[
        (summary_df['indic_to'] == selected_indicator) &
        (summary_df['Year'] == selected_year) &
        (summary_df['geo_layer'] == selected_geo_layer)
    ].copy()

    total_value = filtered_df['Value'].sum()
    filtered_df['Percentage'] = (filtered_df['Value'] / total_value * 100).round(2).astype(str) + '%'


        # 创建条形图，使用 c_resid 作为颜色分类
    fig = px.bar(
        filtered_df,
        x='month',
        y='Value',
        color='c_resid',
        title='Interactive Bar Chart by Month',
        text='Percentage'  # 在条形上显示百分比
    )

    # 更新数据标签的显示设置
    fig.update_traces(textposition='outside', textfont=dict(size=12))
    
    return fig

@app.callback(
    Output('pie_chart', 'figure'),
    [Input('pie_chart_indic_to', 'value'),
     Input('pie_chart_geo_layer', 'value'),
     Input('pie_chart_year', 'value'),
     Input('pie_chart_month', 'value')]
)
def update_pie_chart(selected_indicator, selected_geo_layer, selected_year, selected_month):
    # 数据过滤
    filtered_df = summary_df[
        (summary_df['indic_to'] == selected_indicator) &
        (summary_df['geo_layer'] == selected_geo_layer) &
        (summary_df['Year'] == selected_year) &
        (summary_df['month'] == selected_month)
    ].copy()

    # 创建饼图
    pie_fig = px.pie(
        filtered_df,
        names='c_resid',
        values='Value',
        title=f'Pie Chart for {selected_indicator} - {selected_geo_layer} - {selected_year} - {selected_month}'
    )
    return pie_fig

# 加载 GeoJSON 文件
with open('/Users/mac/NUTS_RG_60M_2024_4326.geojson') as f:
    nuts_geojson = json.load(f)
@app.callback(
    Output('geo_heatmap', 'figure'),
    [Input('heatmap_indic_to', 'value'),
     Input('heatmap_year', 'value'),
     Input('heatmap_c_resid', 'value'),
     Input('heatmap_month', 'value'),
    Input('heatmap_geo_layer', 'value')]
)
def update_geo_heatmap(selected_indicator, selected_year, selected_c_resid, selected_month,selected_geo_layer):
    # 数据过滤
    filtered_heatmap_df = long_df[
        (long_df['indic_to'] == selected_indicator) &
        (long_df['Year'] == selected_year) &
        (long_df['c_resid'] == selected_c_resid) &
        (long_df['month'] == selected_month)&
        (long_df['geo_layer'] == selected_geo_layer)
    ].copy()
    #print(filtered_heatmap_df)
    # 如果没有数据，则可以返回空图或提示
    if filtered_heatmap_df.empty:
        return px.choropleth()  # 你可以插入一个空图，或创建一个文字标记未找到数据
    # 根据 geo layer 确定 locationmode
    #if selected_geo_layer == 'Country':
        #locationmode = 'country names'  # 使用国家名称
    #elif selected_geo_layer == 'NUT1':
        #locationmode = 'NUTS-1'  # 使用 NUTS-1 代码
    #elif selected_geo_layer == 'NUT2':
        #locationmode = 'NUTS-2'  # 使用 NUTS-2 代码
    #else:
        #locationmode = 'country names'  # 默认用国家名称
    # 创建热图
    fig = px.choropleth(
        filtered_heatmap_df,
        geojson=nuts_geojson,
        locations='geo',  # 地理标识符，确保这个与Plotly支持的地区一致
        featureidkey='properties.NUTS_ID',
        color='Value',  # 绘制的值
        hover_name='geo',  # 鼠标悬停时显示的信息
        title=f'Geographical Heatmap for {selected_indicator} in {selected_year}, {selected_c_resid}, {selected_month}, {selected_geo_layer}',
        color_continuous_scale=px.colors.sequential.Plasma  # 选择配色方案
    )
    return fig
if __name__ == '__main__':
    app.run_server(debug=True)

    geo geo_layer indic_to c_resid month  Year     Value
0    AT   Country     LSTY     DOM   M01  2018   23783.0
13   BE   Country     LSTY     DOM   M01  2018   17579.0
28   BG   Country     LSTY     DOM   M01  2018    7321.0
37   CH   Country     LSTY     DOM   M01  2018   41577.0
46   CY   Country     LSTY     DOM   M01  2018    3093.0
49   CZ   Country     LSTY     DOM   M01  2018   22926.0
59   DE   Country     LSTY     DOM   M01  2018  290107.0
114  DK   Country     LSTY     DOM   M01  2018   13428.0
121  EE   Country     LSTY     DOM   M01  2018    4443.0
124  EL   Country     LSTY     DOM   M01  2018   31305.0
142  ES   Country     LSTY     DOM   M01  2018  323844.0
170  FI   Country     LSTY     DOM   M01  2018   14205.0
178  FR   Country     LSTY     DOM   M01  2018  735352.0
220  HR   Country     LSTY     DOM   M01  2018   11817.0
226  HU   Country     LSTY     DOM   M01  2018   12452.0
238  IE   Country     LSTY     DOM   M01  2018   14572.0
243  IS   Country     LSTY     

      geo geo_layer indic_to c_resid month  Year   Value
2    AT11      NUT2     LSTY     DOM   M01  2018   239.0
3    AT12      NUT2     LSTY     DOM   M01  2018   790.0
4    AT13      NUT2     LSTY     DOM   M01  2018  7067.0
6    AT21      NUT2     LSTY     DOM   M01  2018  1738.0
7    AT22      NUT2     LSTY     DOM   M01  2018  4502.0
..    ...       ...      ...     ...   ...   ...     ...
378  SI04      NUT2     LSTY     DOM   M01  2018  2109.0
381  SK01      NUT2     LSTY     DOM   M01  2018  1937.0
382  SK02      NUT2     LSTY     DOM   M01  2018   448.0
383  SK03      NUT2     LSTY     DOM   M01  2018  3861.0
384  SK04      NUT2     LSTY     DOM   M01  2018  2708.0

[257 rows x 7 columns]
       geo geo_layer indic_to c_resid month  Year    Value
9242  AT11      NUT2   NGT_SP     DOM   M01  2018    761.0
9243  AT12      NUT2   NGT_SP     DOM   M01  2018   1968.0
9244  AT13      NUT2   NGT_SP     DOM   M01  2018  14508.0
9246  AT21      NUT2   NGT_SP     DOM   M01  2018   5614

        geo geo_layer indic_to c_resid month  Year    Value
157466  AT1      NUT1      STY     DOM   M02  2023   6239.0
157470  AT2      NUT1      STY     DOM   M02  2023   5204.0
157473  AT3      NUT1      STY     DOM   M02  2023   5218.0
157479  BE1      NUT1      STY     DOM   M02  2023   1823.0
157481  BE2      NUT1      STY     DOM   M02  2023  10467.0
...     ...       ...      ...     ...   ...   ...      ...
157829  SE1      NUT1      STY     DOM   M02  2023   1666.0
157832  SE2      NUT1      STY     DOM   M02  2023   2460.0
157836  SE3      NUT1      STY     DOM   M02  2023   5830.0
157841  SI0      NUT1      STY     DOM   M02  2023   3028.0
157845  SK0      NUT1      STY     DOM   M02  2023  11169.0

[96 rows x 7 columns]
       geo geo_layer indic_to c_resid month  Year     Value
157465  AT   Country      STY     DOM   M02  2023   16661.0
157478  BE   Country      STY     DOM   M02  2023   20947.0
157493  BG   Country      STY     DOM   M02  2023    9516.0
157502  CH   Coun