# Web Scraping Skipass Prices with Python and Beautifulsoup

Data from: https://www.skiinfo.de/schweiz/skipaesse

## Libraries and settings

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

## Web Scraper Skipass Data

In [None]:
# Option (1): Send an HTTP request to the URL (gets first 50 ski resorts)
# url = 'https://www.skiinfo.de/schweiz/skipaesse'
# response = requests.get(url)
# html_content = response.content

# Option (2): Read html from file (gets all ski resorts)
# Read html from file
with open('./data/skipass.html', 'r') as file:
    html_content = file.read()

# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')

# Locate the table and extract values
table = soup.find('table')

# Extract table headers
headers = [header.get_text().replace('\xa0', '') for header in table.find_all('th')]

# Extract table rows
rows = []
for row in table.find_all('tr')[1:]:
    cells = row.find_all('td')
    row_data = [cell.get_text().replace('\xa0', '').strip() for cell in cells]
    rows.append(row_data)

# Create a DataFrame
df = pd.DataFrame(rows, columns=headers)

import pandas as pd

# Change column names
df.columns = [
    'ski_resort', 
    'season_ticket_adults', 
    'season_ticket_children', 
    'day_ticket_adults',
    'day_ticket_children',
    'buy_online'
]

# Show DataFrame dimensions
print('Number of rows and columns:', df.shape)

# Show data types of the columns
print('\nData types of the columns:\n', df.dtypes)

# Change data types and remove special characters
df['season_ticket_adults'] = pd.to_numeric(df['season_ticket_adults'].str.replace(',', ''), errors='coerce')
df['season_ticket_children'] = pd.to_numeric(df['season_ticket_children'].str.replace(',', ''), errors='coerce')
df['day_ticket_adults'] = pd.to_numeric(df['day_ticket_adults'].str.replace(',', ''), errors='coerce')
df['day_ticket_children'] = pd.to_numeric(df['day_ticket_children'].str.replace(',', ''), errors='coerce')

# Show the first few rows
df.head(10)

## Summary Statistics

In [None]:
# Summary statistics
df.describe()

## Histogram of Skipass Prices

In [None]:
# Histograms
fig, axes = plt.subplots(2, 2, figsize=(7, 6))

df['season_ticket_adults'].plot(
    kind='hist',
    bins=20,
    edgecolor='black',
    color='greenyellow', 
    ax=axes[0, 0]
)
axes[0, 0].set_title('Histogram of season_ticket_adults', fontsize=11)
axes[0, 0].set_xlabel('Price')
axes[0, 0].set_ylabel('Frequency')

df['season_ticket_children'].plot(
    kind='hist', 
    bins=20,
    edgecolor='black',
    color='orange',  
    ax=axes[0, 1]
)
axes[0, 1].set_title('Histogram of season_ticket_children', fontsize=11)
axes[0, 1].set_xlabel('Price')
axes[0, 1].set_ylabel('Frequency')

df['day_ticket_adults'].plot(
    kind='hist', 
    bins=20,
    edgecolor='black',
    color='darkred', 
    ax=axes[1, 0]
)
axes[1, 0].set_title('Histogram of day_ticket_adults', fontsize=11)
axes[1, 0].set_xlabel('Price')
axes[1, 0].set_ylabel('Frequency')

df['day_ticket_children'].plot(
    kind='hist', 
    bins=20,
    edgecolor='black',
    color='blue', 
    ax=axes[1, 1]
)
axes[1, 1].set_title('Histogram of day_ticket_children', fontsize=11)
axes[1, 1].set_xlabel('Price')
axes[1, 1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()


### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')