In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# loading in dataset
df = pd.read_csv('../data/raw/European_Ski_Resorts.csv')

In [3]:
# first 5 rows
df.head()

Unnamed: 0,#,Resort,Country,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,Snowparks,NightSki,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
0,1,Alpendorf (Ski amedé),Austria,1980,740,52,30,81,4,115,Yes,No,22,16,11,49,75398,600
1,2,Soldeu-Pas de la Casa/​Grau Roig/​El Tarter/​C...,Andorra,2640,1710,47,100,77,33,210,Yes,Yes,37,28,7,72,99017,1032
2,3,Oberau (Wildschönau),Austria,1130,900,30,1,0,1,2,No,No,2,0,0,2,1932,0
3,4,Dachstein West,Austria,1620,780,42,15,33,3,51,Yes,Yes,25,8,3,36,32938,163
4,5,Rosa Khutor,Southern Russia,2320,940,22,30,26,21,77,Yes,No,6,11,10,27,49228,450


In [4]:
# stats of dataset
df.describe()

Unnamed: 0,#,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
count,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0
mean,188.5,2051.68883,1055.098404,40.779255,35.869681,38.348404,11.776596,86.257979,13.662234,9.340426,3.954787,26.957447,34727.244681,218.361702
std,108.686092,776.719908,423.60431,12.474259,53.719832,49.691136,17.78578,113.740218,14.438661,12.595282,6.565411,31.038957,45955.042689,406.903529
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,94.75,1550.0,779.25,34.0,8.0,9.0,2.0,25.0,5.0,2.0,0.0,10.0,10642.5,0.0
50%,188.5,2104.0,1050.0,42.0,19.0,22.0,5.0,48.0,9.0,5.0,2.0,17.0,19031.5,54.0
75%,282.25,2567.5,1350.0,49.0,40.0,42.25,13.0,100.0,17.0,10.0,5.0,29.25,36555.5,252.0
max,376.0,3899.0,2180.0,81.0,312.0,239.0,103.0,600.0,89.0,74.0,40.0,174.0,252280.0,2383.0


In [5]:
df.shape

(376, 18)

In [6]:
df.columns

Index(['#', 'Resort', 'Country', 'HighestPoint', 'LowestPoint',
       'DayPassPriceAdult', 'BeginnerSlope', 'IntermediateSlope',
       'DifficultSlope', 'TotalSlope', 'Snowparks', 'NightSki', 'SurfaceLifts',
       'ChairLifts', 'GondolaLifts', 'TotalLifts', 'LiftCapacity',
       'SnowCannons'],
      dtype='object')

In [7]:
# countries

country_counts = df['Country'].value_counts()
country_counts

Country
Austria                   89
France                    83
Switzerland               61
Italy                     44
Germany                   24
Norway                    10
Spain                      8
Denmark                    8
Sweden                     6
Andorra                    5
Slovakia                   5
Slovenia                   4
United Kingdom             4
Bulgaria                   4
Southern Russia            4
Finland                    3
Poland                     2
Czech Republic             2
Romania                    2
Lithuania                  1
Liechtenstein              1
Serbia                     1
Greece                     1
Siberia                    1
Bosnia and Herzegovina     1
Ukraine                    1
Netherlands                1
Name: count, dtype: int64

# Day Pass Price

In [8]:
# the highest and cheapest Day Pass Price 

highest_price = df['DayPassPriceAdult'].max()
cheapest_price = df['DayPassPriceAdult'].min()

highest_price_resort = df.loc[df['DayPassPriceAdult'].idxmax(), 'Resort']
highest_price_country = df.loc[df['DayPassPriceAdult'].idxmax(), 'Country']

cheapest_price_resort = df.loc[df['DayPassPriceAdult'].idxmin(), 'Resort']
cheapest_price_country = df.loc[df['DayPassPriceAdult'].idxmin(), 'Country']

print(f"Highest Day Pass Price for an Adult: {highest_price} at {highest_price_resort} in {highest_price_country}.")
print(f"Cheapest Day Pass Price for an Adult: {cheapest_price} at {cheapest_price_resort} in {cheapest_price_country}.")

Highest Day Pass Price for an Adult: 81 at Cervinia in Switzerland.
Cheapest Day Pass Price for an Adult: 0 at Pragelato in Italy.


In [9]:
# day pass for adults by resorts and country
day_pass_resorts = df[['Country','Resort','DayPassPriceAdult']]
day_pass_resorts = day_pass_resorts.sort_values(by='DayPassPriceAdult',ascending=False)
day_pass_resorts

Unnamed: 0,Country,Resort,DayPassPriceAdult
295,Switzerland,Cervinia,81
335,Switzerland,Zermatt - Matterhorn,81
304,Switzerland,St. Moritz - Corviglia,69
35,Switzerland,Corvatsch-​Furtschellas,69
20,Switzerland,Laax-​Flims-​Falera,68
...,...,...,...
162,Southern Russia,Alpika Service,0
217,Denmark,Indoor ski area Skidome Denmark – Randers (pla...,0
123,Italy,Pragelato,0
131,Siberia,Sheregesh-Kemerovo,0


In [10]:
df[df['DayPassPriceAdult'] == highest_price]

Unnamed: 0,#,Resort,Country,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,Snowparks,NightSki,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
295,296,Cervinia,Switzerland,3899,1562,81,75,220,27,322,Yes,No,21,22,20,63,93464,1060
335,336,Zermatt - Matterhorn,Switzerland,3899,1562,81,75,220,27,322,Yes,No,21,22,20,63,93464,1060


In [11]:
df[df['DayPassPriceAdult'] == cheapest_price]

Unnamed: 0,#,Resort,Country,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,Snowparks,NightSki,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
123,124,Pragelato,Italy,2700,1335,0,14,32,4,50,No,No,4,1,0,5,3660,0
131,132,Sheregesh-Kemerovo,Siberia,1270,670,0,0,0,0,0,No,No,3,5,4,12,19919,0
162,163,Alpika Service,Southern Russia,2228,535,0,10,10,5,25,No,No,1,1,7,9,18120,0
217,218,Indoor ski area Skidome Denmark – Randers (pla...,Denmark,120,10,0,1,1,1,3,no report,No,0,0,0,0,0,0
218,219,Dry slopes Indoorski – Rødovre,Denmark,53,50,0,0,0,0,0,no report,No,0,0,0,0,0,0
219,220,Dry slopes Ski Arena – Aarhus,Denmark,23,20,0,0,0,0,0,no report,No,0,0,0,0,0,0
220,221,Copenhagen (planned),Denmark,0,0,0,0,0,0,0,no report,No,0,0,0,0,0,0
255,256,Puigmal,France,2700,1830,0,9,15,7,32,No,No,11,2,0,13,11865,0


# Amount of slopes

In [14]:
# total amount of slopes each country has
total_slope_country = df.groupby('Country')['TotalSlope'].sum()
total_slope_country = total_slope_country.sort_values(ascending=False)

total_slope_country

Country
France                    12546
Austria                    7005
Switzerland                5710
Italy                      3742
Andorra                     723
Spain                       708
Norway                      377
Germany                     321
Sweden                      289
Bulgaria                    155
Southern Russia             142
Finland                     110
United Kingdom               98
Slovenia                     94
Slovakia                     88
Ukraine                      65
Serbia                       55
Poland                       51
Greece                       36
Romania                      33
Czech Republic               32
Bosnia and Herzegovina       25
Liechtenstein                23
Denmark                       3
Lithuania                     1
Netherlands                   1
Siberia                       0
Name: TotalSlope, dtype: int64

In [15]:
# amount of beginner level slopes
beginner_slopes = df.groupby('Country')['BeginnerSlope'].sum()
beginner_slopes = beginner_slopes.sort_values(ascending=False)
beginner_slopes

Country
France                    6123
Austria                   2751
Switzerland               1852
Italy                     1192
Andorra                    344
Spain                      326
Norway                     225
Sweden                     148
Germany                     89
Bulgaria                    73
Southern Russia             51
Slovenia                    49
Finland                     45
United Kingdom              43
Poland                      34
Slovakia                    33
Serbia                      30
Ukraine                     26
Greece                      18
Liechtenstein               11
Czech Republic              11
Romania                      8
Bosnia and Herzegovina       3
Lithuania                    1
Denmark                      1
Siberia                      0
Netherlands                  0
Name: BeginnerSlope, dtype: int64

In [16]:
# amount of intermediate level slopes
intermediate_slopes = df.groupby('Country')['IntermediateSlope'].sum()
intermediate_slopes = intermediate_slopes.sort_values(ascending=False)
intermediate_slopes

Country
France                    4821
Austria                   3386
Switzerland               2879
Italy                     1978
Spain                      280
Andorra                    269
Germany                    181
Sweden                     110
Norway                      87
Bulgaria                    65
Southern Russia             61
Finland                     54
Slovakia                    42
United Kingdom              39
Slovenia                    37
Ukraine                     28
Serbia                      19
Bosnia and Herzegovina      19
Czech Republic              16
Greece                      15
Romania                     13
Poland                      10
Liechtenstein                9
Denmark                      1
Lithuania                    0
Siberia                      0
Netherlands                  0
Name: IntermediateSlope, dtype: int64

In [17]:
# amount of difficult level slopes
difficult_slopes = df.groupby('Country')['DifficultSlope'].sum()
difficult_slopes = difficult_slopes.sort_values(ascending=False)
difficult_slopes

Country
France                    1595
Switzerland                965
Austria                    840
Italy                      554
Andorra                    110
Spain                       99
Norway                      62
Germany                     42
Sweden                      31
Southern Russia             29
United Kingdom              16
Bulgaria                    15
Ukraine                     11
Finland                     10
Slovakia                    10
Romania                      9
Slovenia                     8
Serbia                       6
Poland                       4
Liechtenstein                3
Greece                       3
Bosnia and Herzegovina       3
Czech Republic               2
Denmark                      1
Lithuania                    0
Siberia                      0
Netherlands                  0
Name: DifficultSlope, dtype: int64

# Night Ski and Snow Parks

In [18]:
# NightSki and SnowParks by country 
night_ski_country = df.groupby('Country')['NightSki'].sum()
snow_parks_country = df.groupby('Country')['Snowparks'].sum()

In [19]:
night_ski_country = night_ski_country.sort_values(ascending=False)
night_ski_country

Country
Austria                   32
France                    31
Switzerland               21
Italy                     17
Germany                   13
Norway                     7
Sweden                     4
Bulgaria                   4
Andorra                    3
Finland                    3
Spain                      2
Czech Republic             2
Slovenia                   2
Slovakia                   2
Serbia                     1
Southern Russia            1
Ukraine                    1
Netherlands                1
Romania                    1
Poland                     1
Lithuania                  1
Bosnia and Herzegovina     1
United Kingdom             1
Siberia                    0
Liechtenstein              0
Greece                     0
Denmark                    0
Name: NightSki, dtype: int64

In [20]:
snow_parks_country = snow_parks_country.sort_values(ascending=False)
snow_parks_country

Country
Austria                   69.0
France                    67.0
Switzerland               46.0
Italy                     37.0
Germany                   15.0
Norway                     9.0
Spain                      5.0
Andorra                    5.0
Sweden                     4.0
Slovakia                   4.0
Finland                    3.0
Bulgaria                   3.0
Czech Republic             2.0
Serbia                     1.0
Southern Russia            1.0
Ukraine                    1.0
Slovenia                   1.0
Netherlands                1.0
Poland                     1.0
Lithuania                  1.0
Liechtenstein              1.0
United Kingdom             1.0
Siberia                    0.0
Romania                    0.0
Greece                     0.0
Denmark                    0.0
Bosnia and Herzegovina     0.0
Name: Snowparks, dtype: float64

In [21]:
# most and the least NightSki
max_night_country = night_ski_country.idxmax()
max_night_ski = night_ski_country.max()

least_night_country = night_ski_country.idxmin()
least_night_ski = night_ski_country.min()

print(f'Country with the most Night Skiing resorts: {max_night_country} with {max_night_ski} resorts.')
print(f'Country with the least Night Skiing resorts: {least_night_country} with {least_night_ski} resorts.')

Country with the most Night Skiing resorts: Austria with 32 resorts.
Country with the least Night Skiing resorts: Siberia with 0 resorts.


In [22]:
# most and the least Snow Parks
max_snowparks_country = snow_parks_country.idxmax()
max_snow_parks = snow_parks_country.max()

least_snowparks_country = snow_parks_country.idxmin()
least_snow_parks = snow_parks_country.min()

print(f'Country with the most Snow Parks: {max_snowparks_country} with {max_snow_parks} resorts.')
print(f'Country with the least Snow Parks: {least_night_country} with {least_snow_parks} resorts.')

Country with the most Snow Parks: Austria with 69.0 resorts.
Country with the least Snow Parks: Siberia with 0.0 resorts.


# Lifts

In [23]:
# Total Lifts by Country

total_lifts_country = df.groupby('Country')['TotalLifts'].sum()
total_lifts_country = total_lifts_country.sort_values(ascending=False)
total_lifts_country

Country
France                    3583
Austria                   2513
Switzerland               1285
Italy                     1204
Andorra                    262
Sweden                     196
Germany                    193
Spain                      175
Norway                     145
Finland                     80
Slovakia                    74
Bulgaria                    59
Slovenia                    59
Southern Russia             58
United Kingdom              54
Czech Republic              35
Serbia                      27
Poland                      24
Romania                     19
Netherlands                 19
Ukraine                     18
Greece                      17
Siberia                     12
Bosnia and Herzegovina      11
Liechtenstein                9
Lithuania                    5
Denmark                      0
Name: TotalLifts, dtype: int64

In [24]:
# Gondola Lifts by Country

total_gondola_country = df.groupby('Country')['GondolaLifts'].sum()
total_gondola_country = total_gondola_country.sort_values(ascending=False)
total_gondola_country

Country
France                    448
Austria                   431
Switzerland               282
Italy                     206
Andorra                    26
Southern Russia            23
Germany                    15
Slovakia                    8
Spain                       7
Romania                     7
Bulgaria                    6
Greece                      5
Slovenia                    5
Siberia                     4
Sweden                      4
Norway                      4
Finland                     3
United Kingdom              1
Poland                      1
Bosnia and Herzegovina      1
Serbia                      0
Lithuania                   0
Liechtenstein               0
Denmark                     0
Czech Republic              0
Ukraine                     0
Netherlands                 0
Name: GondolaLifts, dtype: int64

# Highest and Lowest Point

In [25]:
# highest point
highest_point = df.loc[df.groupby('Country')['HighestPoint'].idxmax(), ['Country', 'Resort', 'HighestPoint']]
highest_point = highest_point.sort_values(by='HighestPoint', ascending=False)
highest_point

Unnamed: 0,Country,Resort,HighestPoint
295,Switzerland,Cervinia,3899
142,France,Aiguille du Midi-Chamonix-,3842
366,Austria,Sölden,3340
358,Spain,Sierra Nevada-Pradollano,3282
373,Italy,Gressoney - La-Trinite (Monterosa Ski),3275
1,Andorra,Soldeu-Pas de la Casa/​Grau Roig/​El Tarter/​C...,2640
97,Bulgaria,Borovets,2560
4,Southern Russia,Rosa Khutor,2320
171,Greece,Mount Parnassos-Fterolakka-​Kellaria,2300
207,Germany,Nebelhorn-Oberstdorf,2224


In [26]:
# lowest point
lowest_point = df.loc[df.groupby('Country')['LowestPoint'].idxmax(), ['Country', 'Resort', 'LowestPoint']]
lowest_point = lowest_point.sort_values(by='LowestPoint', ascending=True)
lowest_point

Unnamed: 0,Country,Resort,LowestPoint
216,Denmark,Dry slopes Dayz Søhøjlandets Skicenter,80
193,Lithuania,Indoor ski area Snow Arena-Druskininkai,97
225,Netherlands,Indoor ski area SnowWorld Landgraaf,150
340,Finland,Ruka,291
174,United Kingdom,Glenshee,650
131,Siberia,Sheregesh-Kemerovo,670
5,Poland,Białka Tatrzańska-Kotelnica-​Kaniówka-​Bania,680
180,Sweden,Tänndalen,743
324,Slovenia,Kranjska Gora,800
187,Czech Republic,Špičák,854


In [27]:
df.head()

Unnamed: 0,#,Resort,Country,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,Snowparks,NightSki,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
0,1,Alpendorf (Ski amedé),Austria,1980,740,52,30,81,4,115,1.0,0,22,16,11,49,75398,600
1,2,Soldeu-Pas de la Casa/​Grau Roig/​El Tarter/​C...,Andorra,2640,1710,47,100,77,33,210,1.0,1,37,28,7,72,99017,1032
2,3,Oberau (Wildschönau),Austria,1130,900,30,1,0,1,2,0.0,0,2,0,0,2,1932,0
3,4,Dachstein West,Austria,1620,780,42,15,33,3,51,1.0,1,25,8,3,36,32938,163
4,5,Rosa Khutor,Southern Russia,2320,940,22,30,26,21,77,1.0,0,6,11,10,27,49228,450
