# Pandas class

### Basics

### [pandas docs](https://pandas.pydata.org/docs/index.html)

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd

<module 'pandas' from '/Users/vigadam/.local/share/virtualenvs/da-coding-python-zosgduYD/lib/python3.9/site-packages/pandas/__init__.py'>

In [3]:
pd.DataFrame

pandas.core.frame.DataFrame

In [4]:
help(pd.DataFrame)

Help on class DataFrame in module pandas.core.frame:

class DataFrame(pandas.core.generic.NDFrame, pandas.core.arraylike.OpsMixin)
 |  DataFrame(data=None, index: 'Axes | None' = None, columns: 'Axes | None' = None, dtype: 'Dtype | None' = None, copy: 'bool | None' = None)
 |  
 |  Two-dimensional, size-mutable, potentially heterogeneous tabular data.
 |  
 |  Data structure also contains labeled axes (rows and columns).
 |  Arithmetic operations align on both row and column labels. Can be
 |  thought of as a dict-like container for Series objects. The primary
 |  pandas data structure.
 |  
 |  Parameters
 |  ----------
 |  data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
 |      Dict can contain Series, arrays, constants, dataclass or list-like objects. If
 |      data is a dict, column order follows insertion-order.
 |  
 |      .. versionchanged:: 0.25.0
 |         If data is a list of dicts, column order follows insertion-order.
 |  
 |  index : Index or ar

In [5]:
d = {
    "A": [1, 2, 3, 4],
    "B": [3, 4, 5, 6],
    "C": ["a", "b", "c", "d"],
    "d": [3.2, 4.3, 5, 6],
}

In [6]:
d

{'A': [1, 2, 3, 4],
 'B': [3, 4, 5, 6],
 'C': ['a', 'b', 'c', 'd'],
 'd': [3.2, 4.3, 5, 6]}

In [7]:
df = pd.DataFrame(data=d)

In [8]:
df

Unnamed: 0,A,B,C,d
0,1,3,a,3.2
1,2,4,b,4.3
2,3,5,c,5.0
3,4,6,d,6.0


In [9]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [10]:
df.columns

Index(['A', 'B', 'C', 'd'], dtype='object')

In [11]:
df.values

array([[1, 3, 'a', 3.2],
       [2, 4, 'b', 4.3],
       [3, 5, 'c', 5.0],
       [4, 6, 'd', 6.0]], dtype=object)

In [12]:
df = df.set_index("A")
df

Unnamed: 0_level_0,B,C,d
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,3,a,3.2
2,4,b,4.3
3,5,c,5.0
4,6,d,6.0


In [13]:
df = df.reset_index()
df

Unnamed: 0,A,B,C,d
0,1,3,a,3.2
1,2,4,b,4.3
2,3,5,c,5.0
3,4,6,d,6.0


In [14]:
df.dtypes

A      int64
B      int64
C     object
d    float64
dtype: object

--------

### renaming

In [15]:
df = pd.DataFrame(
    {k: np.random.randint(0, 10, size=5) for k in ["CoRvInUs", "ElTE", "bmE"]}
)

In [16]:
df

Unnamed: 0,CoRvInUs,ElTE,bmE
0,8,5,6
1,2,9,7
2,1,9,7
3,8,7,3
4,0,9,1


In [17]:
df.columns = ["corvinus","elte","bme"]

In [18]:
df

Unnamed: 0,corvinus,elte,bme
0,8,5,6
1,2,9,7
2,1,9,7
3,8,7,3
4,0,9,1


In [19]:
df = df.rename(columns={'CoRvInUs': 'corvinus', 'ElTE': 'elte', 'bmE': 'bme'})
df

Unnamed: 0,corvinus,elte,bme
0,8,5,6
1,2,9,7
2,1,9,7
3,8,7,3
4,0,9,1


In [20]:
df.rename(columns=lambda c: c.upper())

Unnamed: 0,CORVINUS,ELTE,BME
0,8,5,6
1,2,9,7
2,1,9,7
3,8,7,3
4,0,9,1


---------

### add columns

In [21]:
df["bge"] = np.random.randint(0, 10, size=df.shape[0])

In [22]:
df.assign(bge=np.random.randint(0, 10, size=df.shape[0]))

Unnamed: 0,corvinus,elte,bme,bge
0,8,5,6,8
1,2,9,7,0
2,1,9,7,8
3,8,7,3,7
4,0,9,1,1


### add rows

In [23]:
df.append({"corvinus": 1, "elte": 3, "bme": 4}, ignore_index=True)

Unnamed: 0,corvinus,elte,bme,bge
0,8.0,5.0,6.0,1.0
1,2.0,9.0,7.0,2.0
2,1.0,9.0,7.0,4.0
3,8.0,7.0,3.0,1.0
4,0.0,9.0,1.0,1.0
5,1.0,3.0,4.0,


----------

### merging

In [24]:
df2 = pd.DataFrame({k: np.random.randint(0, 10, size=5) for k in ["szfe", "corvinus"]})

In [25]:
df

Unnamed: 0,corvinus,elte,bme,bge
0,8,5,6,1
1,2,9,7,2
2,1,9,7,4
3,8,7,3,1
4,0,9,1,1


In [26]:
df2

Unnamed: 0,szfe,corvinus
0,7,3
1,3,5
2,9,8
3,1,1
4,7,9


In [27]:
df.merge(df2, on="corvinus",how="inner")

Unnamed: 0,corvinus,elte,bme,bge,szfe
0,8,5,6,1,9
1,8,7,3,1,9
2,1,9,7,4,1


In [28]:
df.merge(df2, on="corvinus",how="outer")

Unnamed: 0,corvinus,elte,bme,bge,szfe
0,8,5.0,6.0,1.0,9.0
1,8,7.0,3.0,1.0,9.0
2,2,9.0,7.0,2.0,
3,1,9.0,7.0,4.0,1.0
4,0,9.0,1.0,1.0,
5,3,,,,7.0
6,5,,,,3.0
7,9,,,,7.0


In [29]:
df.merge(df2, on="corvinus",how="left")

Unnamed: 0,corvinus,elte,bme,bge,szfe
0,8,5,6,1,9.0
1,2,9,7,2,
2,1,9,7,4,1.0
3,8,7,3,1,9.0
4,0,9,1,1,


In [30]:
df.merge(df2, on="corvinus",how="right")

Unnamed: 0,corvinus,elte,bme,bge,szfe
0,3,,,,7
1,5,,,,3
2,8,5.0,6.0,1.0,9
3,8,7.0,3.0,1.0,9
4,1,9.0,7.0,4.0,1
5,9,,,,7


----------

### imputing

In [31]:
df = pd.DataFrame(
    {
        "A": [1, None, 3, 4],
        "B": [3, None, None, 6],
        "C": ["a", "b", "c", "d"],
        "D": None,
    }
)

In [32]:
df.isna().any()

A     True
B     True
C    False
D     True
dtype: bool

In [33]:
df.isna().sum()

A    1
B    2
C    0
D    4
dtype: int64

In [34]:
df

Unnamed: 0,A,B,C,D
0,1.0,3.0,a,
1,,,b,
2,3.0,,c,
3,4.0,6.0,d,


In [35]:
df.dropna()

Unnamed: 0,A,B,C,D


In [36]:
df.dropna(subset=["A"])

Unnamed: 0,A,B,C,D
0,1.0,3.0,a,
2,3.0,,c,
3,4.0,6.0,d,


In [37]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,1.0,3.0,a,0
1,0.0,0.0,b,0
2,3.0,0.0,c,0
3,4.0,6.0,d,0


----------

### duplicates

In [38]:
df = pd.DataFrame(
    {
        "brand": ["Yum Yum", "Yum Yum", "Indomie", "Indomie", "Indomie"],
        "style": ["cup", "cup", "cup", "pack", "pack"],
        "rating": [4, 4, 3.5, 15, 5],
    }
)
df

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


In [39]:
df.drop_duplicates(subset=["brand", "style"])

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0


-----------

### indexing, filtering

In [40]:
df

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


In [41]:
df.loc[4,"rating"]

5.0

In [42]:
df.iloc[4,1]

'pack'

In [43]:
df.query("rating == 4")

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0


In [44]:
df.query("style == 'cup'")

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5


In [45]:
df["style"] == "cup"

0     True
1     True
2     True
3    False
4    False
Name: style, dtype: bool

In [46]:
df.loc[df["style"] == "cup"]

Unnamed: 0,brand,style,rating
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5


this is the most pythonic way of filtering by xvar (used in case studies in most cases)

In [47]:
df.loc[lambda x: x["brand"] == "Indomie"]

Unnamed: 0,brand,style,rating
2,Indomie,cup,3.5
3,Indomie,pack,15.0
4,Indomie,pack,5.0


multiple conditions

In [48]:
df.loc[lambda x: (x["brand"] == "Indomie") & (x["style"] == "pack")]

Unnamed: 0,brand,style,rating
3,Indomie,pack,15.0
4,Indomie,pack,5.0


In [49]:
df.loc[lambda x: (x["brand"] == "Indomie") & (x["rating"] < 6)]

Unnamed: 0,brand,style,rating
2,Indomie,cup,3.5
4,Indomie,pack,5.0


------

### sorting by x var

In [50]:
df.sort_values(by=["rating"])

Unnamed: 0,brand,style,rating
2,Indomie,cup,3.5
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
4,Indomie,pack,5.0
3,Indomie,pack,15.0


In [51]:
df.sort_values(by=["rating"], ascending=False)

Unnamed: 0,brand,style,rating
3,Indomie,pack,15.0
4,Indomie,pack,5.0
0,Yum Yum,cup,4.0
1,Yum Yum,cup,4.0
2,Indomie,cup,3.5


---------

### unique values

In [52]:
df["brand"].unique()

array(['Yum Yum', 'Indomie'], dtype=object)

In [53]:
df["brand"].nunique()

2

In [54]:
df["style"].value_counts()

cup     3
pack    2
Name: style, dtype: int64

--------

### metrics

In [55]:
df = pd.DataFrame(
    {
        "A": [12, 4, 5, 44, 1],
        "B": [5, 2, 54, 3, 2],
        "C": [20, 16, 7, 3, 8],
        "D": [14, 3, 17, 2, 6],
    }
)

In [56]:
df

Unnamed: 0,A,B,C,D
0,12,5,20,14
1,4,2,16,3
2,5,54,7,17
3,44,3,3,2
4,1,2,8,6


### by columns

In [57]:
df.max()

A    44
B    54
C    20
D    17
dtype: int64

In [58]:
df.min()

A    1
B    2
C    3
D    2
dtype: int64

In [59]:
df.count()

A    5
B    5
C    5
D    5
dtype: int64

In [60]:
df.mean()

A    13.2
B    13.2
C    10.8
D     8.4
dtype: float64

### easier displayed in a new dataframe

In [61]:
pd.DataFrame(
    {
        "max": df.max(),
        "min": df.min(),
        "n": df.count(),
        "mean": df.mean(),
        "median": df.median(),
        "sum": df.sum(),
        "std": df.std(),
    }
)

Unnamed: 0,max,min,n,mean,median,sum,std
A,44,1,5,13.2,5.0,66,17.683325
B,54,2,5,13.2,3.0,66,22.840753
C,20,3,5,10.8,8.0,54,6.978539
D,17,2,5,8.4,6.0,42,6.730527


### we can do the same by columns

In [62]:
df["B"].sum()

66

In [63]:
df["A"].max()

44

In [64]:
df["C"].std()

6.97853852894716

------------

### grouping

In [65]:
arrays = [
    ["Falcon", "Falcon", "Parrot", "Parrot"],
    ["Captive", "Wild", "Captive", "Wild"],
]
index = pd.MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
df = pd.DataFrame(
    {"Max Speed": [390.0, 350.0, 30.0, 20.0], "Weight": [1200, 1400, 700, 800]},
    index=index,
).reset_index()

In [66]:
df

Unnamed: 0,Animal,Type,Max Speed,Weight
0,Falcon,Captive,390.0,1200
1,Falcon,Wild,350.0,1400
2,Parrot,Captive,30.0,700
3,Parrot,Wild,20.0,800


In [67]:
df.groupby("Animal").mean()

Unnamed: 0_level_0,Max Speed,Weight
Animal,Unnamed: 1_level_1,Unnamed: 2_level_1
Falcon,370.0,1300.0
Parrot,25.0,750.0


In [68]:
df.groupby("Type")["Max Speed"].max().reset_index()

Unnamed: 0,Type,Max Speed
0,Captive,390.0
1,Wild,350.0


In [69]:
df.groupby("Type")["Max Speed"].std().reset_index()

Unnamed: 0,Type,Max Speed
0,Captive,254.558441
1,Wild,233.345238


In [70]:
df.groupby("Animal")["Max Speed"].std().reset_index()

Unnamed: 0,Animal,Max Speed
0,Falcon,28.284271
1,Parrot,7.071068


### a more general way: agg

In [71]:
help(df.groupby("Animal").agg)

Help on method aggregate in module pandas.core.groupby.generic:

aggregate(func=None, *args, engine=None, engine_kwargs=None, **kwargs) method of pandas.core.groupby.generic.DataFrameGroupBy instance
    Aggregate using one or more operations over the specified axis.
    
    Parameters
    ----------
    func : function, str, list or dict
        Function to use for aggregating the data. If a function, must either
        work when passed a DataFrame or when passed to DataFrame.apply.
    
        Accepted combinations are:
    
        - function
        - string function name
        - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
        - dict of axis labels -> functions, function names or list of such.
    
        Can also accept a Numba JIT function with
        ``engine='numba'`` specified. Only passing a single function is supported
        with this engine.
    
        If the ``'numba'`` engine is chosen, the function must be
        a user defined func

In [72]:
df.groupby("Animal").agg(
    max_speed=("Max Speed", "max"),
    mean_spead=("Max Speed", "mean"),
    max_weight=("Weight", "max"),
    mean_weight=("Weight", "mean"),
)

Unnamed: 0_level_0,max_speed,mean_spead,max_weight,mean_weight
Animal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Falcon,390.0,370.0,1400,1300.0
Parrot,30.0,25.0,800,750.0


--------

###   practice excercises with hotel data

In [101]:

df = pd.read_csv("../data/hotels.csv")

In [103]:
df

Unnamed: 0,hotel_id,price,year,month,weekend,country,city,stars
0,1,172,2017,11,1,Netherlands,Amsterdam,4.0
1,1,122,2018,1,1,Netherlands,Amsterdam,4.0
2,1,122,2017,12,0,Netherlands,Amsterdam,4.0
3,1,552,2017,12,0,Netherlands,Amsterdam,4.0
4,1,122,2018,2,1,Netherlands,Amsterdam,4.0
...,...,...,...,...,...,...,...,...
121620,22902,119,2017,11,0,Croatia,Zagreb,4.0
121621,22902,109,2018,4,1,Croatia,Zagreb,4.0
121622,22902,109,2018,3,1,Croatia,Zagreb,4.0
121623,22902,446,2017,12,0,Croatia,Zagreb,4.0


In [104]:
df.head()

Unnamed: 0,hotel_id,price,year,month,weekend,country,city,stars
0,1,172,2017,11,1,Netherlands,Amsterdam,4.0
1,1,122,2018,1,1,Netherlands,Amsterdam,4.0
2,1,122,2017,12,0,Netherlands,Amsterdam,4.0
3,1,552,2017,12,0,Netherlands,Amsterdam,4.0
4,1,122,2018,2,1,Netherlands,Amsterdam,4.0


In [105]:
df.head(10)

Unnamed: 0,hotel_id,price,year,month,weekend,country,city,stars
0,1,172,2017,11,1,Netherlands,Amsterdam,4.0
1,1,122,2018,1,1,Netherlands,Amsterdam,4.0
2,1,122,2017,12,0,Netherlands,Amsterdam,4.0
3,1,552,2017,12,0,Netherlands,Amsterdam,4.0
4,1,122,2018,2,1,Netherlands,Amsterdam,4.0
5,1,114,2017,11,0,Netherlands,Amsterdam,4.0
6,3,118,2017,12,0,Netherlands,Amsterdam,4.0
7,3,217,2017,11,1,Netherlands,Amsterdam,4.0
8,3,114,2018,1,1,Netherlands,Amsterdam,4.0
9,3,737,2017,12,0,Netherlands,Amsterdam,4.0


In [106]:
df.tail()

Unnamed: 0,hotel_id,price,year,month,weekend,country,city,stars
121620,22902,119,2017,11,0,Croatia,Zagreb,4.0
121621,22902,109,2018,4,1,Croatia,Zagreb,4.0
121622,22902,109,2018,3,1,Croatia,Zagreb,4.0
121623,22902,446,2017,12,0,Croatia,Zagreb,4.0
121624,22902,117,2017,12,0,Croatia,Zagreb,4.0


In [107]:
df.head().T

Unnamed: 0,0,1,2,3,4
hotel_id,1,1,1,1,1
price,172,122,122,552,122
year,2017,2018,2017,2017,2018
month,11,1,12,12,2
weekend,1,1,0,0,1
country,Netherlands,Netherlands,Netherlands,Netherlands,Netherlands
city,Amsterdam,Amsterdam,Amsterdam,Amsterdam,Amsterdam
stars,4.0,4.0,4.0,4.0,4.0


In [133]:
df.describe()

Unnamed: 0,hotel_id,price,year,month,weekend,stars
count,121625.0,121625.0,121625.0,121625.0,121625.0,121625.0
mean,10429.775457,181.957755,2017.579683,6.837081,0.670249,3.337883
std,6758.461551,263.278473,0.493612,4.201129,0.470125,0.854411
min,1.0,5.0,2017.0,1.0,0.0,1.0
25%,4478.0,76.0,2017.0,3.0,0.0,3.0
50%,9640.0,116.0,2018.0,6.0,1.0,3.0
75%,14562.0,196.0,2018.0,11.0,1.0,4.0
max,22902.0,39996.0,2018.0,12.0,1.0,5.0


In [135]:
from pandas_profiling import ProfileReport

In [136]:
profile = ProfileReport(df, title="Pandas Profiling Report", explorative=True)

In [137]:
profile

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



Read more about the original dataframe here: https://gabors-data-analysis.com/datasets/hotels-europe/

-------

### Practice questions

#### easy
1. [How many hotels are in the database?](#1)
2. [How many cities are in the database?](#2)
3. [Find the city with the highest amount of sold rooms!](#3)
4. [Which is the most frequent year in the database?](#4)
5. [How mutch for the most expensive and the cheapest room?](#5)
6. [Find the months with the highest number of purchases!](#6)

#### medium
7. [In which city you can find the most expensive room? the cheapest?](#7)
8. [Check wheter staying in Portugal or in the Netherlands is cheaper on average!](#8)
9. [Is it cheaper staying on weekdays or in the weekand? By how mutch?](#9)
10. [Check question 9. but only on London!](#10)
11. [Find the (on average) most expensive three months!](#11)
12. [In which country you can find the most hotels?](#12)

#### hard
13. [Find the city with the most 5 stars hotels!](#13)
14. [In which hotel you can find the highest difference between room prices? ](#14)
15. [Find the country with the highest standard deviation in city level average room prices!](#15)

-------

### Answers

### Easy

#### 1. How many hotels are in the database? <a id="1">

In [123]:
df["hotel_id"].nunique()

17186

#### 2. How many cities are in the database? <a id="2">

In [124]:
df["city"].nunique()

46

#### 3. Find the city with the highest amount of sold rooms! <a id="3">

In [125]:
df["city"].value_counts().head(2)

Paris     15950
London     9681
Name: city, dtype: int64

#### 4. Which is the most frequent year in the database? <a id="4">

In [112]:
df["year"].value_counts()

2018    70504
2017    51121
Name: year, dtype: int64

#### 5. How mutch for the most expensive and the cheapest room? <a id="5">

In [113]:
df["price"].max()

39996

In [114]:
df["price"].min()

5

#### 6. Find the months with the highest number of purchases! <a id="6">

In [115]:
df["month"].value_counts().head(4)

12    26201
11    24920
2     12426
1     12181
Name: month, dtype: int64

### Medium

#### 7. In which city you can find the most expensive room? the cheapest? <a id="7">

In [121]:
df.loc[lambda x: x["price"] == x["price"].max(), "city"].drop_duplicates()

66230    Minsk
Name: city, dtype: object

In [122]:
df.loc[lambda x: x["price"] == x["price"].min(), "city"].drop_duplicates()

68012    Moscow
Name: city, dtype: object

#### 8. Check wheter staying in Portugal or in the Netherlands is cheaper on average!  <a id="8">

In [127]:
df.groupby("country")["price"].mean().loc[["Portugal", "Netherlands"]]

country
Portugal       181.458524
Netherlands    270.028184
Name: price, dtype: float64

#### 9.  Is it cheaper staying on weekdays or in the weekand? By how mutch? <a id="9">

In [128]:
df.groupby("weekend")["price"].mean()

weekend
0    263.682791
1    141.750389
Name: price, dtype: float64

In [129]:
df.groupby("weekend")["price"].mean().diff(-1)[0]

121.93240112514385

#### 10. Check question 9. but only on London! <a id="10">

In [130]:
df.loc[lambda x: x["city"] == "London"].groupby("weekend")["price"].mean()

weekend
0    439.370191
1    211.100157
Name: price, dtype: float64

In [131]:
df.loc[lambda x: x["city"] == "London"].groupby("weekend")["price"].mean().diff(-1)[0]

228.27003411144526

#### 11. Find the (on average) most expensive three months! <a id="11">

In [108]:
df.groupby("month")["price"].mean().sort_values(ascending=False)

month
12    337.769169
6     162.852218
5     161.200812
4     150.461360
3     143.015777
2     127.364961
11    126.446188
1     120.313932
Name: price, dtype: float64

#### 12. In which country you can find the most hotels? <a id="12">

In [138]:
df.groupby("country")["city"].nunique().loc[lambda s: s == s.max()]

country
France            3
Germany           3
Italy             3
Russia            3
Spain             3
United Kingdom    3
Name: city, dtype: int64

### Hard

#### 13. Find the city with the most 5 stars hotels! <a id="13">

In [140]:
(
    df.loc[df["stars"] == 5]
    .drop_duplicates("hotel_id")["city"]
    .value_counts(ascending=False)
    .head()
)

London      126
Istanbul     96
Paris        83
Rome         74
Lisbon       52
Name: city, dtype: int64

#### 14. In which hotel you can find the highest difference between room prices? <a id="14">

In [141]:
(
    df.groupby("hotel_id")
    .agg(max_price=("price", max), min_price=("price", min), n=("price", "count"))
    .assign(diff=lambda df: df["max_price"] - df["min_price"])
    .sort_values(by="diff", ascending=False)
    .iloc[:2]
)

Unnamed: 0_level_0,max_price,min_price,n,diff
hotel_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10921,39996,110,4,39886
14797,14859,3714,10,11145


#### 15. Find the country with the highest standard deviation in city level average room prices! <a id="15">

In [153]:
(
    df.groupby("city")[["price"]]
    .mean()
    .merge(df[["country", "city"]].drop_duplicates(), on="city")
    .groupby("country")["price"]
    .std()
    .fillna(0)
    .sort_values(ascending=False)
    .head()
)

country
Netherlands       126.140275
United Kingdom     79.786691
France             48.548906
Croatia            39.097915
Spain              31.179669
Name: price, dtype: float64