# 📈 Airbnb Price Prediction Project

In [None]:

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder


In [None]:

# Load dataset (replace this with actual dataset path)
df = pd.read_csv("Airbnb_data.csv")
df.head()


In [None]:

# Basic data info and missing values
df.info()
df.isnull().sum()


In [None]:

# Drop rows with missing target or essential features
df = df.dropna(subset=['log_price', 'property_type', 'room_type'])
df.fillna(method='ffill', inplace=True)


In [None]:

# Encode categorical columns
label_enc = LabelEncoder()
for col in ['property_type', 'room_type', 'bed_type', 'cancellation_policy', 'city', 'neighbourhood']:
    if col in df.columns:
        df[col] = label_enc.fit_transform(df[col].astype(str))


In [None]:

# Feature selection
X = df[['property_type', 'room_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds', 'number_of_reviews']]
y = df['log_price']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Train model
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [None]:

# Evaluate model
print("R²:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))


In [None]:

# Visualization
plt.figure(figsize=(8,6))
sns.scatterplot(x=y_test, y=y_pred)
plt.xlabel("Actual Price (Log)")
plt.ylabel("Predicted Price (Log)")
plt.title("Actual vs Predicted Prices")
plt.grid(True)
plt.show()
