In [None]:
# -----------------------------
# Step 1: Import Libraries
# -----------------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# -----------------------------
# Step 2: Create Sample Dataset
# -----------------------------
data = {
    'Area_sqft': [1200, 1500, 1800, 2400, 3000, 3500, 4000, 4500, 5000, 5500],
    'Material_Index': [1.0, 1.2, 1.3, 1.1, 1.4, 1.3, 1.5, 1.2, 1.4, 1.5],
    'Labor_Rate': [1200, 1500, 1600, 1300, 1700, 1650, 1800, 1550, 1750, 1900],
    'Design_Complexity': [2, 3, 4, 2, 4, 5, 3, 4, 5, 5],
    'Region': ['Rural', 'Semi-Urban', 'Urban', 'Rural', 'Urban', 'Urban', 'Semi-Urban', 'Rural', 'Urban', 'Semi-Urban'],
    'Cost_PKR': [2500000, 3100000, 4000000, 3200000, 4500000, 5000000, 4800000, 4200000, 5200000, 5600000]
}

df = pd.DataFrame(data)

print("Dataset Preview:\n", df.head())

# -----------------------------
# Step 3: Preprocessing (Convert Region to Numeric)
# -----------------------------
df['Region'] = df['Region'].map({'Rural': 0, 'Semi-Urban': 1, 'Urban': 2})

# -----------------------------
# Step 4: Define Features (X) and Target (y)
# -----------------------------
X = df[['Area_sqft', 'Material_Index', 'Labor_Rate', 'Design_Complexity', 'Region']]
y = df['Cost_PKR']

# -----------------------------
# Step 5: Split Dataset (Train/Test)
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------------
# Step 6: Train the Model
# -----------------------------
model = LinearRegression()
model.fit(X_train, y_train)

# -----------------------------
# Step 7: Make Predictions
# -----------------------------
y_pred = model.predict(X_test)

# -----------------------------
# Step 8: Evaluate the Model
# -----------------------------
print("\nModel Evaluation Metrics:")
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

# -----------------------------
# Step 9: Visualize Predictions
# -----------------------------
plt.scatter(y_test, y_pred, color='blue', label='Predicted vs Actual')
plt.xlabel('Actual Cost (PKR)')
plt.ylabel('Predicted Cost (PKR)')
plt.title('Actual vs Predicted Project Costs')
plt.legend()
plt.show()

# -----------------------------
# Step 10: Predict for a New Project
# -----------------------------
# Example new project: 3800 sqft, Material Index 1.3, Labor Rate 1600, Complexity 4, Region = Urban (2)
new_data = np.array([[3800, 1.3, 1600, 4, 2]])
predicted_cost = model.predict(new_data)
print(f"\nPredicted Project Cost: {predicted_cost[0]:,.0f} PKR")
