## Customer Lifetime Value (LTV) Prediction
Internship Project

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

# --------------------------------------------------
# Sample Airbnb dataset (offline, no download)
# --------------------------------------------------
data = {
    "Listing_ID": [101,102,103,104,105,106,107,108],
    "Location": ["New York","Paris","London","Berlin","Tokyo","New York","Paris","London"],
    "Room_Type": ["Entire home","Private room","Entire home","Shared room",
                  "Entire home","Private room","Entire home","Private room"],
    "Reviews": [120,45,78,30,210,60,95,40],
    "Availability_365": [180,250,200,300,150,220,190,270],
    "Base_Price": [175,90,150,55,190,110,160,85]
}

df = pd.DataFrame(data)

# --------------------------------------------------
# Encode categorical variables
# --------------------------------------------------
le_location = LabelEncoder()
le_room = LabelEncoder()

df["Location_Enc"] = le_location.fit_transform(df["Location"])
df["Room_Type_Enc"] = le_room.fit_transform(df["Room_Type"])

# --------------------------------------------------
# Feature set and target
# --------------------------------------------------
X = df[["Location_Enc", "Room_Type_Enc", "Reviews", "Availability_365"]]
y = df["Base_Price"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --------------------------------------------------
# Train-test split
# --------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

# --------------------------------------------------
# Train pricing model
# --------------------------------------------------
model = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)

# --------------------------------------------------
# Evaluation
# --------------------------------------------------
pred = model.predict(X_test)

mae = mean_absolute_error(y_test, pred)
rmse = np.sqrt(mean_squared_error(y_test, pred))
r2 = r2_score(y_test, pred)

print("Model Performance")
print("MAE :", mae)
print("RMSE:", rmse)
print("R2  :", r2)

# --------------------------------------------------
# Final price recommendation
# --------------------------------------------------
df["Predicted_Price"] = model.predict(X_scaled)

# simple business adjustment (recommendation logic)
df["Recommended_Price"] = df["Predicted_Price"] * 1.05

# --------------------------------------------------
# Save output
# --------------------------------------------------
final_output = df[[
    "Listing_ID",
    "Location",
    "Room_Type",
    "Reviews",
    "Base_Price",
    "Predicted_Price",
    "Recommended_Price"
]]

final_output.to_csv("Airbnb_Pricing_Output.csv", index=False)

print("\nProject completed successfully")
final_output.head()


Model Performance
MAE : 15.125000000000007
RMSE: 15.565065049655278
R2  : -1.4227125000000025

Project completed successfully


Unnamed: 0,Listing_ID,Location,Room_Type,Reviews,Base_Price,Predicted_Price,Recommended_Price
0,101,New York,Entire home,120,175,166.6,174.93
1,102,Paris,Private room,45,90,101.45,106.5225
2,103,London,Entire home,78,150,144.45,151.6725
3,104,Berlin,Shared room,30,55,68.55,71.9775
4,105,Tokyo,Entire home,210,190,182.2,191.31
