In [None]:
# prompt: forecasting  the house prices accurately using smart regression technique in data science

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset (replace 'house_prices.csv' with your actual file)
try:
  df = pd.read_csv('house_prices.csv')
except FileNotFoundError:
  print("Error: 'house_prices.csv' not found. Please upload your dataset.")
  df = None

if df is not None:
  # Data Preprocessing (Handle missing values, categorical features, etc.)
  # Example: Fill missing numerical values with the mean
  numerical_cols = df.select_dtypes(include=np.number).columns
  df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())

  # Example: One-hot encode categorical features
  categorical_cols = df.select_dtypes(exclude=np.number).columns
  df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

  # Feature Selection (Choose relevant features)
  # Example: Select the top 10 most important features
  # (You may need to use feature importance from a model or other methods)
  # features = X.columns[:10]
  # X = X[features]

  # Separate features (X) and target variable (y)
  X = df.drop('SalePrice', axis=1)  # Assuming 'SalePrice' is the target
  y = df['SalePrice']

  # Split data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


  # Feature Scaling
  scaler = StandardScaler()
  X_train = scaler.fit_transform(X_train)
  X_test = scaler.transform(X_test)

  # Model Training (Gradient Boosting Regressor as an example)
  model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
  model.fit(X_train, y_train)

  # Make predictions on the test set
  y_pred = model.predict(X_test)

  # Evaluate the model
  mse = mean_squared_error(y_test, y_pred)
  rmse = np.sqrt(mse)
  r2 = r2_score(y_test, y_pred)

  print(f"Mean Squared Error: {mse}")
  print(f"Root Mean Squared Error: {rmse}")
  print(f"R-squared: {r2}")

  # Further analysis, hyperparameter tuning, and model selection can be done.




Error: 'house_prices.csv' not found. Please upload your dataset.
