In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
import statsmodels.api as sm

In [34]:
airbnb = pd.read_csv("../data/clean/airbnb_merged.csv")
airbnb

Unnamed: 0,realSum,room_type,person_capacity,host_is_superhost,multi,biz,cleanliness_rating,guest_satisfaction_overall,bedrooms,dist,metro_dist,attr_index,rest_index,weekend,city
0,194.033698,Private room,2.0,False,1,0,10.0,93.0,1,5.022964,2.539380,78.690379,98.253896,False,Amsterdam
1,344.245776,Private room,4.0,False,0,0,8.0,85.0,1,0.488389,0.239404,631.176378,837.280757,False,Amsterdam
2,264.101422,Private room,2.0,False,0,1,9.0,87.0,1,5.748312,3.651621,75.275877,95.386955,False,Amsterdam
3,433.529398,Private room,4.0,False,0,1,9.0,90.0,2,0.384862,0.439876,493.272534,875.033098,False,Amsterdam
4,485.552926,Private room,2.0,True,0,0,10.0,98.0,1,0.544738,0.318693,552.830324,815.305740,False,Amsterdam
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51702,715.938574,Entire home/apt,6.0,False,0,1,10.0,100.0,3,0.530181,0.135447,219.402478,438.756874,True,Vienna
51703,304.793960,Entire home/apt,2.0,False,0,0,8.0,86.0,1,0.810205,0.100839,204.970121,342.182813,True,Vienna
51704,637.168969,Entire home/apt,2.0,False,0,0,10.0,93.0,1,0.994051,0.202539,169.073402,282.296424,True,Vienna
51705,301.054157,Private room,2.0,False,0,0,10.0,87.0,1,3.044100,0.287435,109.236574,158.563398,True,Vienna


# PRICE

## City Hypothesis (ANOVA)

In [16]:
amsterdam = airbnb[airbnb["city"] == "Amsterdam"]["realSum"]
athens = airbnb[airbnb["city"] == "Athens"]["realSum"]
barcelona = airbnb[airbnb["city"] == "Barcelona"]["realSum"]
berlin = airbnb[airbnb["city"] == "Berlin"]["realSum"]
budapest = airbnb[airbnb["city"] == "Budapest"]["realSum"]
lisbon = airbnb[airbnb["city"] == "Lisbon"]["realSum"]
london = airbnb[airbnb["city"] == "London"]["realSum"]
paris = airbnb[airbnb["city"] == "Paris"]["realSum"]
rome = airbnb[airbnb["city"] == "Rome"]["realSum"]
vienna = airbnb[airbnb["city"] == "Vienna"]["realSum"]


#H0: mu realSum amsterdam = mu realSum athens  = mu realSum barcelona = mu realSum berlin = mu realSum budapest  = mu realSum lisbon = mu realSum london = mu realSum paris
    # = mu realSum rome = mu realSum vienna
#H1: mu realSum amsterdam != mu realSum athens  != mu realSum barcelona != mu realSum berlin != mu realSum budapest  != mu realSum lisbon != mu realSum london != mu realSum paris
    # != mu realSum rome != mu realSum vienna

alpha = 0.05
f_stat, p_value = st.f_oneway(amsterdam, athens, barcelona, berlin, budapest, lisbon, london, paris, rome, vienna)

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in prices between cities")
else:
    print("Not Reject Ho. There are not significant differences in prices between cities")

P-valor: 0.0000
Reject Ho. There are significant differences in prices between cities


## Weekend Hypothesis (Two Sample t-test)

In [21]:
weekend = airbnb[airbnb["weekend"] == True]["realSum"]
weekdays = airbnb[airbnb["weekend"] == False]["realSum"]

#H0: mu realSum weekend = mu realSum weekdays
#H1: mu realSum weekend != mu realSum weekdays

alpha = 0.05
f_stat, p_value = st.ttest_ind(weekend, weekdays, equal_var=False, alternative="two-sided")

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in prices between weekend and weekdays")
else:
    print("Not Reject Ho. There are not significant differences in prices between weekend and weekdays")

P-valor: 0.0042
Reject Ho. There are significant differences in prices between weekend and weekdays


In [23]:
#H0: mu realSum weekend  <= mu realSum weekdays
#H1: mu realSum weekend > mu realSum weekdays

alpha = 0.05
f_stat, p_value = st.ttest_ind(weekend, weekdays, equal_var=False, alternative="greater")

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. Prices during the weekend are higher than during weekdays")
else:
    print("Not Reject Ho. Prices during the weekend are smaller or equal than during weekdays")

P-valor: 0.0021
Reject Ho. There are significant differences in prices between weekend and weekdays


## Room Type Hypothesis (ANOVA)

In [25]:
private_room = airbnb[airbnb["room_type"] == "Private room"]["realSum"]
entire_apartment = airbnb[airbnb["room_type"] == "Entire home/apt"]["realSum"]
shared_room = airbnb[airbnb["room_type"] == "Shared room"]["realSum"]

#H0: mu realSum private_room = mu realSum entire_apartment  = mu realSum shared_room   
#H1: mu realSum private_room != mu realSum entire_apartment != mu realSum shared_room  

alpha = 0.05
f_stat, p_value = st.f_oneway(private_room, entire_apartment, shared_room)

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in prices between room types")
else:
    print("Not Reject Ho. There are not significant differences in prices between room types")

P-valor: 0.0000
Reject Ho. There are significant differences in prices between cities


## Superhost Hypothesis (Two Sample t-test)

In [27]:
superhost = airbnb[airbnb["host_is_superhost"] == True]["realSum"]
no_superhost = airbnb[airbnb["host_is_superhost"] == False]["realSum"]

#H0: mu realSum superhost = mu realSum no_superhost
#H1: mu realSum superhost != mu realSum no_superhost

alpha = 0.05
f_stat, p_value = st.ttest_ind(superhost, no_superhost, equal_var=False, alternative="two-sided")

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in prices between superhosts and non superhosts")
else:
    print("Not Reject Ho. There are not significant differences in prices between superhosts and non superhosts")

P-valor: 0.0000
Reject Ho. There are significant differences in prices between superhosts and non superhosts


In [28]:
#H0: mu realSum superhost  <= mu realSum no superhost
#H1: mu realSum superhost > mu realSum no superhost

alpha = 0.05
f_stat, p_value = st.ttest_ind(superhost, no_superhost, equal_var=False, alternative="greater")

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. Prices when the host is a superhost are higher than when the host is not a superhost")
else:
    print("Not Reject Ho. Prices when the host is a superhost are smaller or equal than when the host is not a superhost")

P-valor: 1.0000
Not Reject Ho. Prices when the host is a superhost are smaller or equal than when the host is not a superhost


## Cleanliness Hypothesis (ANOVA)

In [37]:
airbnb["cleanliness_rating"] = airbnb["cleanliness_rating"].astype(int)
#airbnb["cleanliness_rating"].unique()

cleanliness_2 = airbnb[airbnb["cleanliness_rating"] == 2]["realSum"]
cleanliness_3 = airbnb[airbnb["cleanliness_rating"] == 3]["realSum"]
cleanliness_4 = airbnb[airbnb["cleanliness_rating"] == 4]["realSum"]
cleanliness_5 = airbnb[airbnb["cleanliness_rating"] == 5]["realSum"]
cleanliness_6 = airbnb[airbnb["cleanliness_rating"] == 6]["realSum"]
cleanliness_7 = airbnb[airbnb["cleanliness_rating"] == 7]["realSum"]
cleanliness_8 = airbnb[airbnb["cleanliness_rating"] == 8]["realSum"]
cleanliness_9 = airbnb[airbnb["cleanliness_rating"] == 9]["realSum"]
cleanliness_10 = airbnb[airbnb["cleanliness_rating"] == 10]["realSum"]

#H0: mu realSum cleanliness_2 = mu realSum cleanliness_3  = mu realSum cleanliness_4 = mu realSum cleanliness_5 = mu realSum cleanliness_6 = mu realSum cleanliness_7
# = mu realSum cleanliness_8 = = mu realSum cleanliness_9 = mu realSum cleanliness_10
#H1: mu realSum cleanliness_2 != mu realSum cleanliness_3  != mu realSum cleanliness_4 != mu realSum cleanliness_5 != mu realSum cleanliness_6 != mu realSum cleanliness_7
# = mu realSum cleanliness_8 != mu realSum cleanliness_9 != mu realSum cleanliness_10

alpha = 0.05
f_stat, p_value = st.f_oneway(cleanliness_2, cleanliness_3, cleanliness_4, cleanliness_5, cleanliness_6, cleanliness_7, cleanliness_8, cleanliness_9, cleanliness_10)

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in prices between different cleanliness ratings")
else:
    print("Not Reject Ho. There are not significant differences in prices between different cleanliness ratings")

P-valor: 0.0009
Reject Ho. There are significant differences in prices between different cleanliness ratings


## Bedrooms Hypothesis (ANOVA)

In [47]:
#airbnb["bedrooms"].unique()

bedrooms_0 = airbnb[airbnb["bedrooms"] == 2]["realSum"]
bedrooms_1 = airbnb[airbnb["bedrooms"] == 1]["realSum"]
bedrooms_2 = airbnb[airbnb["bedrooms"] == 2]["realSum"]
bedrooms_3 = airbnb[airbnb["bedrooms"] == 3]["realSum"]
bedrooms_4 = airbnb[airbnb["bedrooms"] == 4]["realSum"]
bedrooms_5 = airbnb[airbnb["bedrooms"] == 5]["realSum"]
bedrooms_6 = airbnb[airbnb["bedrooms"] == 6]["realSum"]
bedrooms_8 = airbnb[airbnb["bedrooms"] == 8]["realSum"]
bedrooms_9 = airbnb[airbnb["bedrooms"] == 9]["realSum"]
bedrooms_10 = airbnb[airbnb["bedrooms"] == 10]["realSum"]

#H0: mu realSum bedrooms_0 = mu realSum bedrooms_1  = mu realSum bedrooms_2 = mu realSum bedrooms_3 = mu realSum bedrooms_4 = mu realSum bedrooms_5
# = mu realSum bedrooms_6 = mu realSum bedrooms_7 = mu realSum bedrooms_8 = mu realSum bedrooms_9 = mu realSum bedrooms_10
#H1: mu realSum bedrooms_0 != mu realSum bedrooms_1 != mu realSum bedrooms_2 != mu realSum bedrooms_3 != mu realSum bedrooms_4 != mu realSum bedrooms_5
# != mu realSum bedrooms_6 != mu realSum bedrooms_7 != mu realSum bedrooms_8 != mu realSum bedrooms_9 != mu realSum bedrooms_10

alpha = 0.05
f_stat, p_value = st.f_oneway(bedrooms_0, bedrooms_1, bedrooms_2, bedrooms_3, bedrooms_4, bedrooms_5, bedrooms_6, bedrooms_8, bedrooms_9, bedrooms_10)

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in prices between different total number of bedrooms")
else:
    print("Not Reject Ho. There are not significant differences in prices between total number of bedrooms")

P-valor: 0.0000
Reject Ho. There are significant differences in prices between different total number of bedrooms


## Metro Dist Hypothesis 

In [59]:
X = airbnb['metro_dist']
y = np.log1p(airbnb['realSum'])

X = sm.add_constant(X)

metro_model = sm.OLS(y, X).fit()


print(metro_model.summary())
#To interpret metro_dist coefficient we should do the exponential np.exp(result)

                            OLS Regression Results                            
Dep. Variable:                realSum   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.012
Method:                 Least Squares   F-statistic:                     609.2
Date:                Fri, 07 Mar 2025   Prob (F-statistic):          1.01e-133
Time:                        11:32:55   Log-Likelihood:                -46134.
No. Observations:               51707   AIC:                         9.227e+04
Df Residuals:                   51705   BIC:                         9.229e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.4756      0.003   1650.931      0.0

## Attr Index Hypothesis

In [60]:
X = airbnb['attr_index'] 
y = np.log1p(airbnb['realSum']) 

X = sm.add_constant(X)
attr_model = sm.OLS(y, X).fit()

print(attr_model.summary())
#To interpret metro_dist coefficient we should do the exponential np.exp(result)

                            OLS Regression Results                            
Dep. Variable:                realSum   R-squared:                       0.104
Model:                            OLS   Adj. R-squared:                  0.104
Method:                 Least Squares   F-statistic:                     5981.
Date:                Fri, 07 Mar 2025   Prob (F-statistic):               0.00
Time:                        11:45:43   Log-Likelihood:                -43607.
No. Observations:               51707   AIC:                         8.722e+04
Df Residuals:                   51705   BIC:                         8.724e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          5.1743      0.004   1270.071      0.0

# GUEST SATISFACTION

## City Hypothesis (ANOVA)

In [65]:
amsterdam = airbnb[airbnb["city"] == "Amsterdam"]["guest_satisfaction_overall"]
athens = airbnb[airbnb["city"] == "Athens"]["guest_satisfaction_overall"]
barcelona = airbnb[airbnb["city"] == "Barcelona"]["guest_satisfaction_overall"]
berlin = airbnb[airbnb["city"] == "Berlin"]["guest_satisfaction_overall"]
budapest = airbnb[airbnb["city"] == "Budapest"]["guest_satisfaction_overall"]
lisbon = airbnb[airbnb["city"] == "Lisbon"]["guest_satisfaction_overall"]
london = airbnb[airbnb["city"] == "London"]["guest_satisfaction_overall"]
paris = airbnb[airbnb["city"] == "Paris"]["guest_satisfaction_overall"]
rome = airbnb[airbnb["city"] == "Rome"]["guest_satisfaction_overall"]
vienna = airbnb[airbnb["city"] == "Vienna"]["guest_satisfaction_overall"]


#H0: mu guest_satisfaction_overall amsterdam = mu guest_satisfaction_overall athens  = mu guest_satisfaction_overall barcelona = mu guest_satisfaction_overall berlin = mu guest_satisfaction_overall budapest  = mu guest_satisfaction_overall lisbon = mu guest_satisfaction_overall london = mu guest_satisfaction_overall paris
    # = mu guest_satisfaction_overall rome = mu guest_satisfaction_overall vienna
#H1: mu guest_satisfaction_overall amsterdam != mu guest_satisfaction_overall athens  != mu guest_satisfaction_overall barcelona != mu guest_satisfaction_overall berlin != mu guest_satisfaction_overall budapest  != mu guest_satisfaction_overall lisbon != mu guest_satisfaction_overall london != mu guest_satisfaction_overall paris
    # != mu guest_satisfaction_overall rome != mu guest_satisfaction_overall vienna

alpha = 0.05
f_stat, p_value = st.f_oneway(amsterdam, athens, barcelona, berlin, budapest, lisbon, london, paris, rome, vienna)

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in guest_satisfaction_overall between cities")
else:
    print("Not Reject Ho. There are not significant differences in guest_satisfaction_overall between cities")

P-valor: 0.0000
Reject Ho. There are significant differences in guest_satisfaction_overall between cities


## Room Type Hypothesis (ANOVA)

In [66]:
private_room = airbnb[airbnb["room_type"] == "Private room"]["guest_satisfaction_overall"]
entire_apartment = airbnb[airbnb["room_type"] == "Entire home/apt"]["guest_satisfaction_overall"]
shared_room = airbnb[airbnb["room_type"] == "Shared room"]["guest_satisfaction_overall"]

#H0: mu guest_satisfaction_overall private_room = mu guest_satisfaction_overall entire_apartment  = mu guest_satisfaction_overall shared_room   
#H1: mu guest_satisfaction_overall private_room != mu guest_satisfaction_overall entire_apartment != mu guest_satisfaction_overall shared_room  

alpha = 0.05
f_stat, p_value = st.f_oneway(private_room, entire_apartment, shared_room)

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in guest_satisfaction_overall between room types")
else:
    print("Not Reject Ho. There are not significant differences in guest_satisfaction_overall between room types")

P-valor: 0.0000
Reject Ho. There are significant differences in guest_satisfaction_overall between room types


## Superhost Hypothesis (Two Sample t-test)

In [67]:
superhost = airbnb[airbnb["host_is_superhost"] == True]["guest_satisfaction_overall"]
no_superhost = airbnb[airbnb["host_is_superhost"] == False]["guest_satisfaction_overall"]

#H0: mu guest_satisfaction_overall superhost = mu guest_satisfaction_overall no_superhost
#H1: mu guest_satisfaction_overall superhost != mu guest_satisfaction_overall no_superhost

alpha = 0.05
f_stat, p_value = st.ttest_ind(superhost, no_superhost, equal_var=False, alternative="two-sided")

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in guest_satisfaction_overall between superhosts and non superhosts")
else:
    print("Not Reject Ho. There are not significant differences in guest_satisfaction_overall between superhosts and non superhosts")

P-valor: 0.0000
Reject Ho. There are significant differences in guest_satisfaction_overall between superhosts and non superhosts


In [68]:
#H0: mu guest_satisfaction_overall superhost  <= mu guest_satisfaction_overall no superhost
#H1: mu guest_satisfaction_overall superhost > mu guest_satisfaction_overall no superhost

alpha = 0.05
f_stat, p_value = st.ttest_ind(superhost, no_superhost, equal_var=False, alternative="greater")

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. guest_satisfaction_overall when the host is a superhost are higher than when the host is not a superhost")
else:
    print("Not Reject Ho. guest_satisfaction_overall when the host is a superhost are smaller or equal than when the host is not a superhost")

P-valor: 0.0000
Reject Ho. guest_satisfaction_overall when the host is a superhost are higher than when the host is not a superhost


## Cleanliness Hypothesis (ANOVA)

In [69]:
cleanliness_2 = airbnb[airbnb["cleanliness_rating"] == 2]["guest_satisfaction_overall"]
cleanliness_3 = airbnb[airbnb["cleanliness_rating"] == 3]["guest_satisfaction_overall"]
cleanliness_4 = airbnb[airbnb["cleanliness_rating"] == 4]["guest_satisfaction_overall"]
cleanliness_5 = airbnb[airbnb["cleanliness_rating"] == 5]["guest_satisfaction_overall"]
cleanliness_6 = airbnb[airbnb["cleanliness_rating"] == 6]["guest_satisfaction_overall"]
cleanliness_7 = airbnb[airbnb["cleanliness_rating"] == 7]["guest_satisfaction_overall"]
cleanliness_8 = airbnb[airbnb["cleanliness_rating"] == 8]["guest_satisfaction_overall"]
cleanliness_9 = airbnb[airbnb["cleanliness_rating"] == 9]["guest_satisfaction_overall"]
cleanliness_10 = airbnb[airbnb["cleanliness_rating"] == 10]["guest_satisfaction_overall"]

#H0: mu guest_satisfaction_overall cleanliness_2 = mu guest_satisfaction_overall cleanliness_3  = mu guest_satisfaction_overall cleanliness_4 = mu guest_satisfaction_overall cleanliness_5 = mu guest_satisfaction_overall cleanliness_6 = mu guest_satisfaction_overall cleanliness_7
# = mu guest_satisfaction_overall cleanliness_8 = = mu guest_satisfaction_overall cleanliness_9 = mu guest_satisfaction_overall cleanliness_10
#H1: mu guest_satisfaction_overall cleanliness_2 != mu guest_satisfaction_overall cleanliness_3  != mu guest_satisfaction_overall cleanliness_4 != mu guest_satisfaction_overall cleanliness_5 != mu guest_satisfaction_overall cleanliness_6 != mu guest_satisfaction_overall cleanliness_7
# = mu guest_satisfaction_overall cleanliness_8 != mu guest_satisfaction_overall cleanliness_9 != mu guest_satisfaction_overall cleanliness_10

alpha = 0.05
f_stat, p_value = st.f_oneway(cleanliness_2, cleanliness_3, cleanliness_4, cleanliness_5, cleanliness_6, cleanliness_7, cleanliness_8, cleanliness_9, cleanliness_10)

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in guest_satisfaction_overall between different cleanliness ratings")
else:
    print("Not Reject Ho. There are not significant differences in guest_satisfaction_overall between different cleanliness ratings")

P-valor: 0.0000
Reject Ho. There are significant differences in guest_satisfaction_overall between different cleanliness ratings


## Bedroom Hypothesis (ANOVA)

In [70]:
bedrooms_0 = airbnb[airbnb["bedrooms"] == 2]["guest_satisfaction_overall"]
bedrooms_1 = airbnb[airbnb["bedrooms"] == 1]["guest_satisfaction_overall"]
bedrooms_2 = airbnb[airbnb["bedrooms"] == 2]["guest_satisfaction_overall"]
bedrooms_3 = airbnb[airbnb["bedrooms"] == 3]["guest_satisfaction_overall"]
bedrooms_4 = airbnb[airbnb["bedrooms"] == 4]["guest_satisfaction_overall"]
bedrooms_5 = airbnb[airbnb["bedrooms"] == 5]["guest_satisfaction_overall"]
bedrooms_6 = airbnb[airbnb["bedrooms"] == 6]["guest_satisfaction_overall"]
bedrooms_8 = airbnb[airbnb["bedrooms"] == 8]["guest_satisfaction_overall"]
bedrooms_9 = airbnb[airbnb["bedrooms"] == 9]["guest_satisfaction_overall"]
bedrooms_10 = airbnb[airbnb["bedrooms"] == 10]["guest_satisfaction_overall"]

#H0: mu guest_satisfaction_overall bedrooms_0 = mu guest_satisfaction_overall bedrooms_1  = mu guest_satisfaction_overall bedrooms_2 = mu guest_satisfaction_overall bedrooms_3 = mu guest_satisfaction_overall bedrooms_4 = mu guest_satisfaction_overall bedrooms_5
# = mu guest_satisfaction_overall bedrooms_6 = mu guest_satisfaction_overall bedrooms_7 = mu guest_satisfaction_overall bedrooms_8 = mu guest_satisfaction_overall bedrooms_9 = mu guest_satisfaction_overall bedrooms_10
#H1: mu guest_satisfaction_overall bedrooms_0 != mu guest_satisfaction_overall bedrooms_1 != mu guest_satisfaction_overall bedrooms_2 != mu guest_satisfaction_overall bedrooms_3 != mu guest_satisfaction_overall bedrooms_4 != mu guest_satisfaction_overall bedrooms_5
# != mu guest_satisfaction_overall bedrooms_6 != mu guest_satisfaction_overall bedrooms_7 != mu guest_satisfaction_overall bedrooms_8 != mu guest_satisfaction_overall bedrooms_9 != mu guest_satisfaction_overall bedrooms_10

alpha = 0.05
f_stat, p_value = st.f_oneway(bedrooms_0, bedrooms_1, bedrooms_2, bedrooms_3, bedrooms_4, bedrooms_5, bedrooms_6, bedrooms_8, bedrooms_9, bedrooms_10)

print(f"P-valor: {p_value:.4f}")

if p_value < alpha:
    print("Reject Ho. There are significant differences in guest_satisfaction_overall between different total number of bedrooms")
else:
    print("Not Reject Ho. There are not significant differences in guest_satisfaction_overall between total number of bedrooms")

P-valor: 0.0023
Reject Ho. There are significant differences in guest_satisfaction_overall between different total number of bedrooms


## Metro District Hypothesis

In [71]:
X = airbnb['metro_dist']
y = np.log1p(airbnb['guest_satisfaction_overall'])

X = sm.add_constant(X)

model_log = sm.OLS(y, X).fit()


print(model_log.summary())
#To interpret metro_dist coefficient we should do the exponential np.exp(result)

                                OLS Regression Results                                
Dep. Variable:     guest_satisfaction_overall   R-squared:                       0.000
Model:                                    OLS   Adj. R-squared:                  0.000
Method:                         Least Squares   F-statistic:                     21.87
Date:                        Fri, 07 Mar 2025   Prob (F-statistic):           2.92e-06
Time:                                12:19:06   Log-Likelihood:                 33090.
No. Observations:                       51707   AIC:                        -6.618e+04
Df Residuals:                           51705   BIC:                        -6.616e+04
Df Model:                                   1                                         
Covariance Type:                    nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------

## Attr Index Hypothesis

In [72]:
X = airbnb['attr_index'] 
y = np.log1p(airbnb['guest_satisfaction_overall']) 

X = sm.add_constant(X)
attr_model = sm.OLS(y, X).fit()

print(attr_model.summary())
#To interpret metro_dist coefficient we should do the exponential np.exp(result)

                                OLS Regression Results                                
Dep. Variable:     guest_satisfaction_overall   R-squared:                       0.001
Model:                                    OLS   Adj. R-squared:                  0.001
Method:                         Least Squares   F-statistic:                     72.64
Date:                        Fri, 07 Mar 2025   Prob (F-statistic):           1.60e-17
Time:                                12:20:27   Log-Likelihood:                 33116.
No. Observations:                       51707   AIC:                        -6.623e+04
Df Residuals:                           51705   BIC:                        -6.621e+04
Df Model:                                   1                                         
Covariance Type:                    nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------

## Price/ Client Satisfaction

In [74]:
X = airbnb['guest_satisfaction_overall'] 
y = np.log1p(airbnb['realSum']) 

X = sm.add_constant(X)
satisfaction_model = sm.OLS(y, X).fit()

print(satisfaction_model.summary())

                            OLS Regression Results                            
Dep. Variable:                realSum   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     8.961
Date:                Fri, 07 Mar 2025   Prob (F-statistic):            0.00276
Time:                        12:22:15   Log-Likelihood:                -46432.
No. Observations:               51707   AIC:                         9.287e+04
Df Residuals:                   51705   BIC:                         9.289e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------
const               