In [28]:
# ================================
#  AIRBNB DYNAMIC PRICING ENGINE
#  FULL GOOGLE COLAB PIPELINE
# ================================

import pandas as pd
import numpy as np

# ---- STEP 1: LOAD DATA ----
df = pd.read_csv("/content/Airbnb_Data.csv", on_bad_lines='skip', engine='python')
print("Data Loaded Successfully!")
print(df.head())

# ---- STEP 2: CREATE PRICE COLUMN FROM log_price ----
df["price"] = np.exp(df["log_price"]).round(2)
print("\nPrice column created from log_price:")
print(df[["log_price", "price"]].head())

# ---- STEP 3: DROP COLUMNS NOT SUITABLE FOR MODELING ----
drop_cols = [
    "id", "description", "name", "first_review", "last_review",
    "amenities", "thumbnail_url", "host_since",
    "host_response_rate", "zipcode"
]

df = df.drop(columns=drop_cols, errors="ignore")
print("\nColumns after dropping:")
print(df.columns)

# ---- STEP 4: HANDLE MISSING VALUES ----
num_cols = ['bathrooms', 'bedrooms', 'beds', 'review_scores_rating']

for col in num_cols:
    df[col] = df[col].fillna(df[col].median())

df["neighbourhood"] = df["neighbourhood"].fillna("Unknown")

print("\nMissing values handled!")

# ---- STEP 5: ONE-HOT ENCODING OF CATEGORICAL COLUMNS ----
categorical = [
    "property_type", "room_type", "bed_type", "cancellation_policy",
    "city", "host_has_profile_pic", "host_identity_verified",
    "instant_bookable", "neighbourhood", "cleaning_fee"
]

df = pd.get_dummies(df, columns=categorical, drop_first=True)
print("\nCategorical variables encoded!")
print(df.head())



Data Loaded Successfully!
         id  log_price property_type        room_type  \
0   6901257   5.010635     Apartment  Entire home/apt   
1   6304928   5.129899     Apartment  Entire home/apt   
2   7919400   4.976734     Apartment  Entire home/apt   
3  13418779   6.620073         House  Entire home/apt   
4   3808709   4.744932     Apartment  Entire home/apt   

                                           amenities  accommodates  bathrooms  \
0  {"Wireless Internet","Air conditioning",Kitche...             3        1.0   
1  {"Wireless Internet","Air conditioning",Kitche...             7        1.0   
2  {TV,"Cable TV","Wireless Internet","Air condit...             5        1.0   
3  {TV,"Cable TV",Internet,"Wireless Internet",Ki...             4        1.0   
4  {TV,Internet,"Wireless Internet","Air conditio...             2        1.0   

   bed_type cancellation_policy  cleaning_fee  ...   latitude   longitude  \
0  Real Bed              strict          True  ...  40.696524  -73.

In [29]:
# ---- STEP 6: TRAIN TEST SPLIT ----
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

X = df.drop("price", axis=1)
y = df["price"]

# Ensure no NaNs remain in X before splitting
X = X.fillna(0)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ---- STEP 7: TRAIN MODEL ----
model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("\nMODEL TRAINED!")
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))

# ---- STEP 8: PRICE SUGGESTION FUNCTION ----
def suggest_price(input_dict):
    temp = pd.DataFrame([input_dict])

    # Add missing columns
    for col in X.columns:
        if col not in temp:
            temp[col] = 0

    return round(model.predict(temp[X.columns])[0], 2)

print("\nPrice suggestion function is ready!")




MODEL TRAINED!
RMSE: 81.55801814869898
R2 Score: 0.7630404990398665

Price suggestion function is ready!

File exported for Tableau: Airbnb_Tableau_Final.csv


In [30]:
# ---- STEP 9: EXPORT CLEANED DATA FOR TABLEAU ----
output_path = "Airbnb_Tableau_Final.csv"
df.to_csv(output_path, index=False)

print("\nFile exported for Tableau:", output_path)


File exported for Tableau: Airbnb_Tableau_Final.csv


In [32]:
from google.colab import files
files.download("Airbnb_Tableau_Final.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>