In [27]:
import pandas as pd
import numpy as np

In [28]:
df_score = pd.read_csv("output/2026_visitors_pred.csv")

In [29]:
# --- 1. Snow Reliability Score ---
df_score["snow_score"] = (
    - 0.5 * df_score["avg_max_temp"]    # colder max temp = better
    - 0.5 * df_score["avg_min_temp"]  # weight min temp
    + 0.5 * df_score["sum_precip"]    # more snow = better
)

In [30]:
# --- 2. Normalize Crowding & Snow (0–1 scaling) ---
def minmax_norm(s):
    return (s - s.min()) / (s.max() - s.min())

df_score["snow_norm"] = minmax_norm(df_score["snow_score"])
df_score["crowd_norm"] = minmax_norm(df_score["visitors_pred"])  # higher crowd = worse

In [31]:
# --- 3. Add Price Index ---
season_pass_prices = {
    "Charlotte Pass": 1759,
    "Falls Creek": 1379,
    "Mt. Baw Baw": 555,
    "Mt. Buller": 1649,
    "Mt. Hotham": 1379,
    "Perisher": 1379,
    "Selwyn": 799,
    "Thredbo": 1850,
    "Mt. Stirling": 959
}

df_score["season_pass_price"] = df_score["Resort"].map(season_pass_prices)

# Normalize (0 = cheapest, 1 = most expensive)
df_score["price_norm"] = (
    (df_score["season_pass_price"] - df_score["season_pass_price"].min()) /
    (df_score["season_pass_price"].max() - df_score["season_pass_price"].min())
)


In [32]:
# Load sentiment CSV
sentiment_df = pd.read_csv("../data/resort_sentiment_ratio.csv")

# Make sure resort names match exactly with df_score
print(sentiment_df.head(20))

           Resort  sentiment_ratio
0  Charlotte Pass         1.416667
1     Falls Creek         4.000000
2     Mt. Baw Baw         2.000000
3      Mt. Hotham         5.200000
4      Mt. Buller        29.000000
5    Mt. Stirling         6.500000
6        Perisher         1.142857
7          Selwyn         1.000000
8         Thredbo         4.000000


In [33]:
# Merge into 2026 holiday score dataframe
df_score = df_score.merge(sentiment_df, on="Resort", how="left")

# Normalize sentiment if needed
df_score["sentiment_norm"] = (
    (df_score["sentiment_ratio"] - df_score["sentiment_ratio"].min()) /
    (df_score["sentiment_ratio"].max() - df_score["sentiment_ratio"].min())
)

In [34]:
df_score.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135 entries, 0 to 134
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Year               135 non-null    int64  
 1   Week               135 non-null    int64  
 2   Resort             135 non-null    object 
 3   avg_max_temp       135 non-null    float64
 4   avg_min_temp       135 non-null    float64
 5   sum_precip         135 non-null    float64
 6   visitors_pred      135 non-null    float64
 7   snow_score         135 non-null    float64
 8   snow_norm          135 non-null    float64
 9   crowd_norm         135 non-null    float64
 10  season_pass_price  135 non-null    int64  
 11  price_norm         135 non-null    float64
 12  sentiment_ratio    135 non-null    float64
 13  sentiment_norm     135 non-null    float64
dtypes: float64(10), int64(3), object(1)
memory usage: 14.9+ KB


In [36]:
# --- 4. Holiday Score ---
# weights can be tuned (α=0.5, β=0.3, γ=0.2 for now)
df_score["holiday_score"] = (
    0.3 * df_score["snow_norm"]
    - 0.3 * df_score["crowd_norm"]
    - 0.1 * df_score["price_norm"]
    + 0.3 * df_score["sentiment_norm"]
)

# --- 5. Rank ---
df_score = df_score.sort_values("holiday_score", ascending=False).reset_index(drop=True)


df_score.head(20)


Unnamed: 0,Year,Week,Resort,avg_max_temp,avg_min_temp,sum_precip,visitors_pred,snow_score,snow_norm,crowd_norm,season_pass_price,price_norm,sentiment_ratio,sentiment_norm,holiday_score
0,2026,1,Mt. Buller,2.844732,-1.196906,264.021696,25941.07,131.186936,0.820565,0.412968,1649,0.844788,29.0,1.0,0.3378
1,2026,11,Mt. Stirling,3.402935,-1.523439,290.578079,1702.3113,144.349292,1.0,0.0271,959,0.311969,6.5,0.196429,0.319602
2,2026,11,Mt. Buller,3.402935,-1.523439,290.578079,43546.215,144.349292,1.0,0.693232,1649,0.844788,29.0,1.0,0.307552
3,2026,11,Mt. Baw Baw,3.402935,-1.523439,290.578079,8090.06,144.349292,1.0,0.128789,555,0.0,2.0,0.035714,0.272077
4,2026,1,Mt. Stirling,2.844732,-1.196906,264.021696,2570.7961,131.186936,0.820565,0.040926,959,0.311969,6.5,0.196429,0.261623
5,2026,15,Mt. Buller,6.523578,0.415611,210.955337,17541.146,102.008074,0.422786,0.279246,1649,0.844788,29.0,1.0,0.258583
6,2026,11,Selwyn,3.402935,-1.523439,290.578079,8911.561,144.349292,1.0,0.141867,799,0.188417,1.0,0.0,0.238598
7,2026,1,Mt. Baw Baw,2.844732,-1.196906,264.021696,7587.4307,131.186936,0.820565,0.120788,555,0.0,2.0,0.035714,0.220647
8,2026,1,Selwyn,2.844732,-1.196906,264.021696,5809.8774,131.186936,0.820565,0.09249,799,0.188417,1.0,0.0,0.199581
9,2026,5,Mt. Stirling,2.300878,-1.843534,218.32785,1469.0986,108.935253,0.51722,0.023387,959,0.311969,6.5,0.196429,0.175882


In [None]:
# df_score.to_csv("output/2026_holiday_score_pred.csv", index=False)