# Altair Example: Tourist Arrivals

## Read the dataset

In [70]:
import pandas as pd

df = pd.read_csv('source/tourist_arrivals_countries.csv', parse_dates=['Date'])
df.head()

Unnamed: 0,Date,IT,FR,DE,PT,ES,UK
0,1990-01-01,2543920.0,,3185877.0,325138.0,1723786.0,1776000.0
1,1990-02-01,2871632.0,,3588879.0,381539.0,1885718.0,2250000.0
2,1990-03-01,3774702.0,,4272437.0,493957.0,2337847.0,2662000.0
3,1990-04-01,5107712.0,,4689424.0,635822.0,3172302.0,2645000.0
4,1990-05-01,4738376.0,,6045278.0,609952.0,3072480.0,3096000.0


## Exploratory data analysis

In [26]:
from pandas_profiling import ProfileReport

eda = ProfileReport(df)
eda.to_file(output_file='eda.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## From data to information

Select only data for which all the countries do not have null values

In [71]:
mask_fr = df['FR'].isnull() == False
mask_uk = df['UK'].isnull() == False
df = df[(mask_fr) & (mask_uk)]

In [72]:
df2 = pd.melt(df, id_vars='Date')

In [73]:
import altair as alt

alt.Chart(df2).mark_line().encode(
    x = 'Date:T',
    y = 'value:Q',
    color='variable:N'
)

We note an increasing trend in data. Our objective is to understand the gap between the last and the first year.

In [74]:
df['Date'].describe()

  df['Date'].describe()


count                     310
unique                    310
top       1994-01-01 00:00:00
freq                        1
first     1994-01-01 00:00:00
last      2019-10-01 00:00:00
Name: Date, dtype: object

In [75]:
mask_first = df['Date'] == '1994-01-01'
mask_last = df['Date'] == '2019-10-01'
df3 = df[(mask_first) | (mask_last)]

In [76]:
df4 = pd.melt(df3, id_vars='Date')

In [77]:
alt.Chart(df4).mark_line(point=alt.OverlayMarkDef()).encode(
    x = alt.X('Date:O', title='', axis=alt.Axis(labelAngle=0)),
    y = 'value:Q',
    color='variable:N'
).properties(width=400)

Select two countries, leave the others for context

In [106]:
alt.Chart(df4).mark_line(point=alt.OverlayMarkDef()).encode(
    x = alt.X('Date:O', title='', axis=alt.Axis(labelAngle=0)),
    y = 'value:Q',
    color=alt.Color('variable:N', scale=alt.Scale(range=['#F77F00','#003049','#EAE2B7', '#EAE2B7', '#EAE2B7', '#EAE2B7'], domain=['PT', 'DE','FR', 'IT','UK', 'ES']))
).properties(width=400)

In [109]:
pi_pt = (df3[mask_last]['PT'].values[0] - df3[mask_first]['PT'].values[0])/df3[mask_first]['PT'].values[0]*100
pi_de = (df3[mask_last]['DE'].values[0] - df3[mask_first]['DE'].values[0])/df3[mask_first]['DE'].values[0]*100

  pi_pt = (df3[mask_last]['PT'].values[0] - df3[mask_first]['PT'].values[0])/df3[mask_first]['PT'].values[0]*100
  pi_de = (df3[mask_last]['DE'].values[0] - df3[mask_first]['DE'].values[0])/df3[mask_first]['DE'].values[0]*100


In [110]:
pi_pt,pi_de

(532.3215683044356, 268.13704838870393)

In [113]:
df5 = pd.DataFrame({
    'Date'    : [1994,2019,1994,2019],
    'Country' : ['PT', 'PT', 'DE', 'DE'],
    'Tourist Arrivals' : [0, pi_pt, 0, pi_de]
})

In [114]:
df5

Unnamed: 0,Date,Country,Tourist Arrivals
0,1994,PT,0.0
1,2019,PT,532.321568
2,1994,DE,0.0
3,2019,DE,268.137048


In [183]:
chart = alt.Chart(df5).mark_line(point=alt.OverlayMarkDef()).encode(
    x = alt.X('Date:O', title='', axis=alt.Axis(labelAngle=0)),
    y = alt.Y('Tourist Arrivals:Q',title='Percentage increase in tourist arrivals'),
    color=alt.Color('Country:N', scale=alt.Scale(range=['#F77F00','#003049'], domain=['PT', 'DE']))
).properties(width=400)


chart

Add text

In [184]:
pi_df = pd.DataFrame({
    'Text' : ['Portugal: ' + '{:.2f}'.format(pi_pt) + '%', 'Germany: ' + '{:.2f}'.format(pi_de) + '%'],
    'Y' : [550,300],
    'X' : [2019,2019],
    'Country' : ['PT', 'DE']
})

pi = alt.Chart(pi_df).mark_text(dx=80,fontSize=15).encode(
    text='Text:N',
    y='Y:Q',
    x='X:O',
    color=alt.Color('Country:N', scale=alt.Scale(range=['#F77F00','#003049'], domain=['PT', 'DE']), legend=None)
)

(chart + pi).configure_view(strokeWidth=0)

## From information to knowledge

Add a context

In [185]:
# build the annotation 
annotation = """Thanks to the introduction of low cost flights, 
Portugal has experienced an increase in tourist arrivals of over 500% in 25 years, 
even surpassing the increase in Germany, one of the favorite destinations for tourists since ever."""
text_df = pd.DataFrame([{'text' : annotation}])

text = alt.Chart(text_df).mark_text(lineBreak='\n',align='left',fontSize=15,y=250).encode(
    text='text:N',
    color=alt.value('#F77F00')
)

# build the final chart
total = (chart + pi + text).configure_view(strokeWidth=0)
total


# From Knowledge to wisdom

Add a call to action

In [194]:
total.properties(title='Yes, continue investing in tourism in Portugal.'
).configure_title(fontSize=24,offset=25)