In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
import joblib

# 🚀 Load the dataset
df = pd.read_csv('rent_data_csv.csv')
print("📊 Data Loaded!")

# ✅ Rename columns to match our model logic
df.rename(columns={
    'District': 'location',
    'HouseType': 'category',
    'Bedrooms': 'bedrooms',
    'Bathrooms': 'bathrooms',
    'Amenity': 'amenity',
    'EstimatedRent(RWF)': 'price'
}, inplace=True)

# 🧼 Encode categorical data
label_encoders = {}
for column in ['location', 'category', 'amenity']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# ⚖️ Scale features
scaler = StandardScaler()
X = df[['location', 'category', 'bedrooms', 'bathrooms', 'amenity']]
X_scaled = scaler.fit_transform(X)
y = df['price']

# 🤖 Train model
model = LinearRegression()
model.fit(X_scaled, y)
print("✅ Model trained!")

import joblib

# Save the model and encoders
joblib.dump(model, "model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")
joblib.dump(scaler, "scaler.pkl")

print("✅ Model and preprocessing files saved!")


📊 Data Loaded!
✅ Model trained!
✅ Model and preprocessing files saved!
