In [18]:
import pandas as pd

ski_resort = pd.read_csv('../European_Ski_Resorts.csv')

# Raggruppa per paese e calcola le medie richieste
aggregated_df = ski_resort.groupby("Country").agg({
    "TotalSlope": "mean",
    "TotalLifts": "mean",
    "HighestPoint": "mean",
    "DayPassPriceAdult": "mean",
    "SnowCannons": "mean"
}).reset_index()

# Rinomina le colonne per chiarezza
aggregated_df.columns = [
    "Country",
    "Avg_Total_Slopes",
    "Avg_Total_Lifts",
    "Avg_Highest_Point",
    "Avg_Day_Pass_Price",
    "Avg_Snow_Cannons"
]



In [19]:
import pycountry 

# Funzione per ottenere il country code ISO alpha-2 da pycountry
def get_country_code(country_name):
    try:
        return pycountry.countries.lookup(country_name).alpha_2
    except LookupError:
        return None

# Applica la funzione al dataset media_df
aggregated_df['country_code'] = aggregated_df['Country'].apply(get_country_code)

# Visualizza i paesi per cui non è stato trovato un codice
missing_codes = aggregated_df[aggregated_df['country_code'].isna()]
print("Paesi non riconosciuti da pycountry:\n", missing_codes)

Paesi non riconosciuti da pycountry:
             Country  Avg_Total_Slopes  Avg_Total_Lifts  Avg_Highest_Point  \
18          Siberia               0.0             12.0            1270.00   
21  Southern Russia              35.5             14.5            1614.25   

    Avg_Day_Pass_Price  Avg_Snow_Cannons country_code  
18                 0.0               0.0         None  
21                19.0             112.5         None  


In [20]:
# Cerca la riga che contiene 'Southern Russia' nel campo 'Country'
southern_russia_row = aggregated_df[aggregated_df['Country'].str.contains('Southern Russia', case=False, na=False)]

southern_russia_row

Unnamed: 0,Country,Avg_Total_Slopes,Avg_Total_Lifts,Avg_Highest_Point,Avg_Day_Pass_Price,Avg_Snow_Cannons,country_code
21,Southern Russia,35.5,14.5,1614.25,19.0,112.5,


In [21]:
# Assegna 'RU' alla colonna country_code per la riga corrispondente a Southern Russia
aggregated_df.at[21, 'country_code'] = 'RU'

# Verifica la modifica
aggregated_df.loc[21]

Country               Southern Russia
Avg_Total_Slopes                 35.5
Avg_Total_Lifts                  14.5
Avg_Highest_Point             1614.25
Avg_Day_Pass_Price               19.0
Avg_Snow_Cannons                112.5
country_code                       RU
Name: 21, dtype: object

In [22]:
# Identifica la riga che contiene 'Siberia' nel campo 'Country'
siberia_index = aggregated_df[aggregated_df['Country'].str.contains('Siberia', case=False, na=False)].index

# Rimuovi la riga
aggregated_df.drop(index=siberia_index, inplace=True)

# Verifica che la riga sia stata rimossa
aggregated_df[aggregated_df['Country'].str.contains('Siberia', case=False, na=False)]

Unnamed: 0,Country,Avg_Total_Slopes,Avg_Total_Lifts,Avg_Highest_Point,Avg_Day_Pass_Price,Avg_Snow_Cannons,country_code


In [23]:
aggregated_df.drop(columns=['Country'], inplace=True)

In [24]:
aggregated_df.head()

Unnamed: 0,Avg_Total_Slopes,Avg_Total_Lifts,Avg_Highest_Point,Avg_Day_Pass_Price,Avg_Snow_Cannons,country_code
0,144.6,52.4,2419.0,43.0,699.6,AD
1,78.707865,28.235955,2089.247191,44.247191,310.134831,AT
2,25.0,11.0,1889.0,20.0,0.0,BA
3,38.75,14.75,2334.0,26.0,167.5,BG
4,16.0,17.5,1218.5,28.5,15.0,CZ


In [25]:
aggregated_df.to_csv("media_per_country.csv", index=False)