# Altair Example: Tourist Arrivals

## Read the dataset

In [50]:
import pandas as pd
from config import *

df = pd.read_csv('source/tourist_arrivals_countries.csv', parse_dates=['Date'])
df.head()

Unnamed: 0,Date,IT,FR,DE,PT,ES,UK
0,1990-01-01,2543920.0,,3185877.0,325138.0,1723786.0,1776000.0
1,1990-02-01,2871632.0,,3588879.0,381539.0,1885718.0,2250000.0
2,1990-03-01,3774702.0,,4272437.0,493957.0,2337847.0,2662000.0
3,1990-04-01,5107712.0,,4689424.0,635822.0,3172302.0,2645000.0
4,1990-05-01,4738376.0,,6045278.0,609952.0,3072480.0,3096000.0


In [51]:
df.tail()

Unnamed: 0,Date,IT,FR,DE,PT,ES,UK
353,2019-06-01,10555177.0,12472500.0,13910286.0,2143639.0,11344295.0,7525413.0
354,2019-07-01,11506828.0,13174390.0,14744389.0,2205705.0,12097382.0,8962949.0
355,2019-08-01,11649500.0,13692822.0,14570339.0,2531809.0,12893366.0,8889049.0
356,2019-09-01,9888817.0,11684845.0,14373815.0,2263748.0,,5858984.0
357,2019-10-01,7692388.0,10401793.0,13780441.0,1995942.0,,7455781.0


## Exploratory data analysis

In [20]:
from ydata_profiling import ProfileReport

eda = ProfileReport(df)
eda.to_file(output_file='eda.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## Raw Graph

In [52]:
df2 = pd.melt(df, id_vars='Date', value_name='Tourist Arrivals', var_name='Country')
df2.head()

Unnamed: 0,Date,Country,Tourist Arrivals
0,1990-01-01,IT,2543920.0
1,1990-02-01,IT,2871632.0
2,1990-03-01,IT,3774702.0
3,1990-04-01,IT,5107712.0
4,1990-05-01,IT,4738376.0


In [53]:
import altair as alt

chart = alt.Chart(df2).mark_line().encode(
    x = 'Date:T',
    y = 'Tourist Arrivals:Q',
    color=alt.Color('Country:N')
)

chart # alternatively, use chart.save('chart.html')

We note an increasing trend in data. Our objective is to understand the gap between the last and the first year.

## From data to information

Select only data for which all the countries do not have null values

In [54]:
mask_fr = df['FR'].isnull() == False
mask_uk = df['UK'].isnull() == False
mask_es = df['ES'].isnull() == False
df = df[(mask_fr) & (mask_uk) & (mask_es)]

Calculate the average value by year and take only the first and last year

In [55]:
df2 = df.copy()
df2.loc[:, 'Year'] = df['Date'].dt.year

In [56]:
df2 = df2.groupby(by='Year').mean(numeric_only=True)
df2 = df2.iloc[[0, -2]]
df2.reset_index(inplace=True)

In [57]:
df3 = pd.melt(df2, id_vars='Year',value_name='Tourist Arrivals', var_name='Country')

In [59]:
chart = alt.Chart(df3).mark_line(point=alt.OverlayMarkDef()).encode(
    x = alt.X('Year:O', title='', axis=alt.Axis(labelAngle=0)),
    y = 'Tourist Arrivals:Q',
    color=alt.Color('Country:N')
).properties(
    width=400, 
    height=300,
    title='Countries Trend in Arrivals'
)
    

chart

Select Portugal and calculate the average value for the other countries

In [75]:
mask = df3['Country'] == 'PT'
df4 = df3.loc[~mask].groupby('Year').mean().reset_index()

In [82]:
df4['Country'] = 'Others (mean)'
df4

Unnamed: 0,Year,Tourist Arrivals,Country
0,1994,4503850.8,Others (mean)
1,2018,9082776.1,Others (mean)


In [83]:
df5 = df3.loc[mask].append(df4, ignore_index=True)

df5

Unnamed: 0,Year,Country,Tourist Arrivals
0,1994,PT,563354.8
1,2018,PT,1703339.0
2,1994,Others (mean),4503851.0
3,2018,Others (mean),9082776.0


In [86]:
chart = alt.Chart(df5).mark_line(point=alt.OverlayMarkDef()).encode(
    x = alt.X('Year:O', title='', axis=alt.Axis(labelAngle=0)),
    y = 'Tourist Arrivals:Q',
    color=alt.Color('Country:N')
).properties(
    width=400, 
    height=300,
    title='Countries Trend in Arrivals'
)

chart

In [88]:
df5

Unnamed: 0,Year,Country,Tourist Arrivals
0,1994,PT,563354.8
1,2018,PT,1703339.0
2,1994,Others (mean),4503851.0
3,2018,Others (mean),9082776.0


In [104]:
mask_first = df5['Year'] == 1994
mask_last = df5['Year'] == 2018
mask_pt = df5['Country'] == 'PT'
mask_ot = df5['Country'] == 'Others (mean)'

pi_pt = (df5[mask_last & mask_pt]['Tourist Arrivals'].values[0] - \
        df5[mask_first & mask_pt]['Tourist Arrivals'].values[0]) / \
        df5[mask_first & mask_pt]['Tourist Arrivals'].values[0]*100

pi_ot = (df5[mask_last & mask_ot]['Tourist Arrivals'].values[0] - \
        df5[mask_first & mask_ot]['Tourist Arrivals'].values[0]) / \
        df5[mask_first & mask_ot]['Tourist Arrivals'].values[0]*100

In [106]:
pi_pt,pi_ot

(202.3563301873981, 101.66689580391962)

In [107]:
df_pi = pd.DataFrame({
    'Date'    : [1994,2018,1994,2018],
    'Country' : ['PT', 'PT', 'Others (mean)', 'Others (mean)'],
    'Tourist Arrivals' : [0, pi_pt, 0, pi_ot]
})

In [108]:
df_pi

Unnamed: 0,Date,Country,Tourist Arrivals
0,1994,PT,0.0
1,2018,PT,202.35633
2,1994,Others (mean),0.0
3,2018,Others (mean),101.666896


In [111]:
chart = alt.Chart(df_pi).mark_line(point=alt.OverlayMarkDef()).encode(
    x = alt.X('Date:O', title='', axis=alt.Axis(labelAngle=0)),
    y = alt.Y('Tourist Arrivals:Q',title='Percentage increase'),
    color=alt.Color('Country:N')
).properties(
    width=400, 
    height=300,
    title='Countries Trend Increase in Arrivals'
)

chart

Add text

In [179]:
pi_df = pd.DataFrame({
    'Text' : ['Portugal: ' + '{:.2f}'.format(pi_pt) + '%', 'Germany: ' + '{:.2f}'.format(pi_de) + '%'],
    'Y' : [pi_pt,pi_de],
    'X' : [2018,2018],
    'Country' : ['PT', 'DE']
})

pi = alt.Chart(pi_df).mark_text(dx=100,fontSize=textFontSize).encode(
    text='Text:N',
    y='Y:Q',
    x='X:O',
    color=alt.Color('Country:N', scale=alt.Scale(range=iColor2, domain=['PT', 'DE']), legend=None)
)

total = (chart + pi).configure_view(strokeWidth=0)

configure_layout(total)

## From information to knowledge

Add a context

In [180]:
# build the annotation 
annotation = f"""Thanks to the introduction of low-cost flights, 
Portugal has experienced an increase 
in tourist arrivals of over 200% in 25 years, 
even surpassing the increase in Germany, 
one of the favorite destinations for tourists ever."""
text_df = pd.DataFrame([{'text' : annotation}])

text = alt.Chart(text_df).mark_text(lineBreak='\n',align='left',fontSize=textFontSize,y=100).encode(
    text='text:N',
    color=alt.value(iColor)
)

# build the final chart
total = (chart + pi | text).configure_view(strokeWidth=0)

configure_layout(total)


# From Knowledge to wisdom

Add a call to action

In [181]:
total = total.properties(title='Yes, you can build a new swimming pool!'
)

configure_layout(total)