In [1]:
import numpy as np
import pandas as pd

users = pd.read_csv('/kaggle/input/international-hotel-booking-analytics/users.csv')
hotels = pd.read_csv('/kaggle/input/international-hotel-booking-analytics/hotels.csv')
reviews = pd.read_csv('/kaggle/input/international-hotel-booking-analytics/reviews.csv')

tmp_df=pd.merge(hotels,reviews, on='hotel_id') # merge reviews & hotels (on hotel_id)
df=pd.merge(tmp_df,users, on='user_id') # merge with users (on user_id)

# Data Cleaning

In [2]:
def clean_data(df):
    df.rename(columns={'country_x': 'hotel_country', 'country_y': 'user_country'}, inplace=True)  # rename columns for clarity

    df['review_date'] = pd.to_datetime(df['review_date'])  # convert to datetime to enable date operations
    df['join_date'] = pd.to_datetime(df['join_date'])      # same here

    missing_cells = df.isnull().sum().sum()  # count NaNs
    duplicates = df.duplicated().sum()       # count duplicate rows

    print("- Info:")
    print(df.info())
    print(f"- Missing Values: {missing_cells}")
    print(f"- Duplicate Rows: {duplicates}")

    if missing_cells > 0 or duplicates > 0:
        print("Cleaning...")
        df = df.drop_duplicates()  # remove duplicates

        numeric_cols = df.select_dtypes(include=['number']).columns  # all numeric cols
        df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())  # fill with mean

        text_cols = df.select_dtypes(include=['object']).columns  # all text cols
        df[text_cols] = df[text_cols].fillna('Unknown')  # fill empty text

        print("Done cleaning.")
    else:
        print("No cleaning needed.")

    return df
clean_data(df)

- Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 29 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   hotel_id               50000 non-null  int64         
 1   hotel_name             50000 non-null  object        
 2   city                   50000 non-null  object        
 3   hotel_country          50000 non-null  object        
 4   star_rating            50000 non-null  int64         
 5   lat                    50000 non-null  float64       
 6   lon                    50000 non-null  float64       
 7   cleanliness_base       50000 non-null  float64       
 8   comfort_base           50000 non-null  float64       
 9   facilities_base        50000 non-null  float64       
 10  location_base          50000 non-null  float64       
 11  staff_base             50000 non-null  float64       
 12  value_for_money_base   50000 non-null  float64      

Unnamed: 0,hotel_id,hotel_name,city,hotel_country,star_rating,lat,lon,cleanliness_base,comfort_base,facilities_base,...,score_facilities,score_location,score_staff,score_value_for_money,review_text,user_gender,user_country,age_group,traveller_type,join_date
0,1,The Azure Tower,New York,United States,5,40.7580,-73.9855,9.1,8.8,8.9,...,8.5,9.0,8.8,8.7,Practice reduce young our because machine. Rec...,Female,New Zealand,25-34,Solo,2021-03-21
1,1,The Azure Tower,New York,United States,5,40.7580,-73.9855,9.1,8.8,8.9,...,8.4,8.9,8.7,8.4,Summer leader sea. Civil morning operation amo...,Female,Spain,55+,Solo,2023-10-05
2,1,The Azure Tower,New York,United States,5,40.7580,-73.9855,9.1,8.8,8.9,...,9.1,9.5,9.1,7.5,Apply need explain movement wait above bar. Tu...,Male,Australia,25-34,Business,2021-12-18
3,1,The Azure Tower,New York,United States,5,40.7580,-73.9855,9.1,8.8,8.9,...,8.2,9.1,8.3,8.4,Soldier job southern back check. Fall check pu...,Other,United Kingdom,18-24,Couple,2023-02-02
4,1,The Azure Tower,New York,United States,5,40.7580,-73.9855,9.1,8.8,8.9,...,8.7,9.4,9.0,7.4,Various get next environment. This them positi...,Female,South Africa,35-44,Business,2024-12-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,25,The Kiwi Grand,Wellington,New Zealand,5,-41.2865,174.7762,9.3,9.2,9.1,...,8.3,9.1,8.5,8.9,National travel race trouble nor whole bank. M...,Female,France,35-44,Couple,2023-09-05
49996,25,The Kiwi Grand,Wellington,New Zealand,5,-41.2865,174.7762,9.3,9.2,9.1,...,9.0,8.7,8.8,8.8,Natural fish body seem home. Check book carry ...,Male,China,18-24,Family,2020-09-14
49997,25,The Kiwi Grand,Wellington,New Zealand,5,-41.2865,174.7762,9.3,9.2,9.1,...,8.3,8.6,8.8,8.5,Interesting outside question question forget. ...,Male,Germany,25-34,Couple,2022-07-12
49998,25,The Kiwi Grand,Wellington,New Zealand,5,-41.2865,174.7762,9.3,9.2,9.1,...,8.1,9.3,9.0,8.7,Dinner apply or mention various.\nSince defens...,Female,United Kingdom,55+,Couple,2022-06-13


# Data-Engineering Questions

# Predictive Modeling Task

# Model Explainability