# Hotel Reviews in Europe

__Season analysis__

__Import libraries and dataset__

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

df = pd.read_csv("data/Hotel_Reviews.csv")

__Explore review date__

In [None]:
df['Review_Date'].head()

__Create new season feature from the review date__

__Note__: meteorological season dates for the northern hemisphere are used here.

In [None]:
def calc_season(row):
    month = int(row['Review_Date'].split('/')[0])
    if month in ([12,1,2]):
        return 'Winter'
    elif month in ([3,4,5]):
        return 'Spring'
    elif month in ([6,7,8]):
        return 'Summer'
    return 'Autumn'
    
df['Season'] = df.apply(calc_season, axis=1)
df[['Review_Date', 'Season']].head()

__Create new hotel country feature from the hotel address__

In [None]:
df['Hotel_Country'] = df.apply(lambda x: x['Hotel_Address'].split(" ")[-1], axis=1)
df['Hotel_Country'] = df.apply(lambda x: 'United Kingdom' if 'Kingdom' in x['Hotel_Country'] else x['Hotel_Country'], axis=1)
df[['Hotel_Country', 'Hotel_Address']].head()

In [None]:
countries = df['Hotel_Country'].unique()
countries

__Group by hotel country and season__

In [None]:
df_country_by_season = df.groupby(['Hotel_Country', 'Season']).size().reset_index()
df_country_by_season.columns = ['Hotel_Country', 'Season', 'Guest Amount']
df_country_by_season

__Plot guest distribution by country and season__

In [None]:
# Define order of seasons
seasons = ['Spring', 'Summer', 'Autumn', 'Winter']
df_country_by_season['Season']=pd.Categorical(df_country_by_season['Season'],categories=seasons)

x = np.arange(len(seasons))  # label locations
width = 0.1  # width of the bars

fig, ax = plt.subplots()

x_coords = np.arange(-len(countries)//2, 1+len(countries)//2)

for country, x_coord in zip(countries, x_coords):
    country_guests = df_country_by_season.loc[df_country_by_season['Hotel_Country'] == country, ['Season','Guest Amount']].sort_values('Season')['Guest Amount'].tolist()
    ax.bar(x + x_coord*width, country_guests, width, label=country)

ax.set_ylabel('Number of guests')
ax.set_title('Hotel guests by country and season')
ax.set_xticks(x)
ax.set_xticklabels(seasons)
ax.legend()

plt.show()