In [7]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
# loading in dataset
df = pd.read_csv('../data/raw/European_Ski_Resorts.csv')

In [9]:
# first 5 rows
df.head()

Unnamed: 0,#,Resort,Country,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,Snowparks,NightSki,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
0,1,Alpendorf (Ski amedé),Austria,1980,740,52,30,81,4,115,Yes,No,22,16,11,49,75398,600
1,2,Soldeu-Pas de la Casa/​Grau Roig/​El Tarter/​C...,Andorra,2640,1710,47,100,77,33,210,Yes,Yes,37,28,7,72,99017,1032
2,3,Oberau (Wildschönau),Austria,1130,900,30,1,0,1,2,No,No,2,0,0,2,1932,0
3,4,Dachstein West,Austria,1620,780,42,15,33,3,51,Yes,Yes,25,8,3,36,32938,163
4,5,Rosa Khutor,Southern Russia,2320,940,22,30,26,21,77,Yes,No,6,11,10,27,49228,450


In [10]:
# stats of dataset
df.describe()

Unnamed: 0,#,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
count,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0,376.0
mean,188.5,2051.68883,1055.098404,40.779255,35.869681,38.348404,11.776596,86.257979,13.662234,9.340426,3.954787,26.957447,34727.244681,218.361702
std,108.686092,776.719908,423.60431,12.474259,53.719832,49.691136,17.78578,113.740218,14.438661,12.595282,6.565411,31.038957,45955.042689,406.903529
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,94.75,1550.0,779.25,34.0,8.0,9.0,2.0,25.0,5.0,2.0,0.0,10.0,10642.5,0.0
50%,188.5,2104.0,1050.0,42.0,19.0,22.0,5.0,48.0,9.0,5.0,2.0,17.0,19031.5,54.0
75%,282.25,2567.5,1350.0,49.0,40.0,42.25,13.0,100.0,17.0,10.0,5.0,29.25,36555.5,252.0
max,376.0,3899.0,2180.0,81.0,312.0,239.0,103.0,600.0,89.0,74.0,40.0,174.0,252280.0,2383.0


In [18]:
df.shape

(376, 18)

In [12]:
df.columns

Index(['#', 'Resort', 'Country', 'HighestPoint', 'LowestPoint',
       'DayPassPriceAdult', 'BeginnerSlope', 'IntermediateSlope',
       'DifficultSlope', 'TotalSlope', 'Snowparks', 'NightSki', 'SurfaceLifts',
       'ChairLifts', 'GondolaLifts', 'TotalLifts', 'LiftCapacity',
       'SnowCannons'],
      dtype='object')

In [17]:
# countries

country_counts = df['Country'].value_counts()
country_counts

Country
Austria                   89
France                    83
Switzerland               61
Italy                     44
Germany                   24
Norway                    10
Spain                      8
Denmark                    8
Sweden                     6
Andorra                    5
Slovakia                   5
Slovenia                   4
United Kingdom             4
Bulgaria                   4
Southern Russia            4
Finland                    3
Poland                     2
Czech Republic             2
Romania                    2
Lithuania                  1
Liechtenstein              1
Serbia                     1
Greece                     1
Siberia                    1
Bosnia and Herzegovina     1
Ukraine                    1
Netherlands                1
Name: count, dtype: int64

In [14]:
# finding the amount of slopes each country has
slopes = df[df['TotalSlope'] > 0]
slopes_by_country = slopes.groupby('Country')['Resort'].count()
slopes_by_country

Country
Andorra                    5
Austria                   89
Bosnia and Herzegovina     1
Bulgaria                   4
Czech Republic             2
Denmark                    1
Finland                    3
France                    82
Germany                   23
Greece                     1
Italy                     44
Liechtenstein              1
Lithuania                  1
Netherlands                1
Norway                    10
Poland                     2
Romania                    2
Serbia                     1
Slovakia                   5
Slovenia                   4
Southern Russia            4
Spain                      8
Sweden                     6
Switzerland               60
Ukraine                    1
United Kingdom             4
Name: Resort, dtype: int64

In [21]:
# day pass for adults by resorts
day_pass_resorts = df[['Resort','DayPassPriceAdult']]
day_pass_resorts

Unnamed: 0,Resort,DayPassPriceAdult
0,Alpendorf (Ski amedé),52
1,Soldeu-Pas de la Casa/​Grau Roig/​El Tarter/​C...,47
2,Oberau (Wildschönau),30
3,Dachstein West,42
4,Rosa Khutor,22
...,...,...
371,Montgenèvre (Via Lattea),48
372,Sauze d’Oulx (Via Lattea),48
373,Gressoney - La-Trinite (Monterosa Ski),43
374,Champoluc (Monterosa Ski),43


In [32]:
# the highest and cheapest Day Pass Price 

highest_price = df['DayPassPriceAdult'].max()
cheapest_price = df['DayPassPriceAdult'].min()

highest_price_resort = df.loc[df['DayPassPriceAdult'].idxmax(), 'Resort']
highest_price_country = df.loc[df['DayPassPriceAdult'].idxmax(), 'Country']

cheapest_price_resort = df.loc[df['DayPassPriceAdult'].idxmin(), 'Resort']
cheapest_price_country = df.loc[df['DayPassPriceAdult'].idxmin(), 'Country']

print(f"Highest Day Pass Price for an Adult: {highest_price} at {highest_price_resort} in {highest_price_country}.")
print(f"Cheapest Day Pass Price for an Adult: {cheapest_price} at {cheapest_price_resort} in {cheapest_price_country}.")

Highest Day Pass Price for an Adult: 81 at Cervinia in Switzerland.
Cheapest Day Pass Price for an Adult: 0 at Pragelato in Italy.


In [41]:
# cheapest resorts of day
cheapest_resorts = df[df['DayPassPriceAdult'] == cheapest_price]
cheapest_resorts

Unnamed: 0,#,Resort,Country,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,Snowparks,NightSki,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
123,124,Pragelato,Italy,2700,1335,0,14,32,4,50,No,No,4,1,0,5,3660,0
131,132,Sheregesh-Kemerovo,Siberia,1270,670,0,0,0,0,0,No,No,3,5,4,12,19919,0
162,163,Alpika Service,Southern Russia,2228,535,0,10,10,5,25,No,No,1,1,7,9,18120,0
217,218,Indoor ski area Skidome Denmark – Randers (pla...,Denmark,120,10,0,1,1,1,3,no report,No,0,0,0,0,0,0
218,219,Dry slopes Indoorski – Rødovre,Denmark,53,50,0,0,0,0,0,no report,No,0,0,0,0,0,0
219,220,Dry slopes Ski Arena – Aarhus,Denmark,23,20,0,0,0,0,0,no report,No,0,0,0,0,0,0
220,221,Copenhagen (planned),Denmark,0,0,0,0,0,0,0,no report,No,0,0,0,0,0,0
255,256,Puigmal,France,2700,1830,0,9,15,7,32,No,No,11,2,0,13,11865,0


In [37]:
df[df['DayPassPriceAdult'] == highest_price]

Unnamed: 0,#,Resort,Country,HighestPoint,LowestPoint,DayPassPriceAdult,BeginnerSlope,IntermediateSlope,DifficultSlope,TotalSlope,Snowparks,NightSki,SurfaceLifts,ChairLifts,GondolaLifts,TotalLifts,LiftCapacity,SnowCannons
295,296,Cervinia,Switzerland,3899,1562,81,75,220,27,322,Yes,No,21,22,20,63,93464,1060
335,336,Zermatt - Matterhorn,Switzerland,3899,1562,81,75,220,27,322,Yes,No,21,22,20,63,93464,1060
