**Task-1 : House Price Prediction**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
df = pd.read_csv('data.csv')
print(df.head())
X = df[['sqft_living', 'bedrooms', 'bathrooms']]
y = df['price']
encoder = None
encoded_features = []
if 'city' in df.columns:
    encoder = OneHotEncoder(drop='first', sparse=False)
    city_encoded = encoder.fit_transform(df[['city']])
    city_encoded_df = pd.DataFrame(city_encoded, columns=encoder.get_feature_names_out(['city']))
    X = pd.concat([X, city_encoded_df], axis=1)
    encoded_features = encoder.get_feature_names_out(['city'])

#splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest Regressor': RandomForestRegressor(n_estimators=100, random_state=42),
    'Decision Tree Regressor': DecisionTreeRegressor(random_state=42),
    'Gradient Boosting Regressor': GradientBoostingRegressor(n_estimators=100, random_state=42)
}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = mse ** 0.5
    print(f'{name} RMSE: {rmse:.2f}')

#Example
example = pd.DataFrame([[1500, 3, 2, 'Seattle']], columns=['sqft_living', 'bedrooms', 'bathrooms', 'city'])
if 'city' in df.columns and encoder is not None:
    example_encoded = encoder.transform(example[['city']])
    example_encoded_df = pd.DataFrame(example_encoded, columns=encoded_features)
    example = example.drop('city', axis=1)
    example = pd.concat([example, example_encoded_df], axis=1)
example = example.reindex(columns=X.columns, fill_value=0)
print("Example data for prediction:")
print(example)
for name, model in models.items():
    prediction = model.predict(example)
    print(f'{name} prediction: ${prediction[0]:,.2f}')


                  date      price  bedrooms  bathrooms  sqft_living  sqft_lot  \
0  2014-05-02 00:00:00   313000.0       3.0       1.50         1340      7912   
1  2014-05-02 00:00:00  2384000.0       5.0       2.50         3650      9050   
2  2014-05-02 00:00:00   342000.0       3.0       2.00         1930     11947   
3  2014-05-02 00:00:00   420000.0       3.0       2.25         2000      8030   
4  2014-05-02 00:00:00   550000.0       4.0       2.50         1940     10500   

   floors  waterfront  view  condition  sqft_above  sqft_basement  yr_built  \
0     1.5           0     0          3        1340              0      1955   
1     2.0           0     4          5        3370            280      1921   
2     1.0           0     0          4        1930              0      1966   
3     1.0           0     0          4        1000           1000      1963   
4     1.0           0     0          4        1140            800      1976   

   yr_renovated                    str



Random Forest Regressor RMSE: 991988.65
Decision Tree Regressor RMSE: 1025599.70
Gradient Boosting Regressor RMSE: 991811.31
Example data for prediction:
   sqft_living  bedrooms  bathrooms  city_Auburn  city_Beaux Arts Village  \
0         1500         3          2          0.0                      0.0   

   city_Bellevue  city_Black Diamond  city_Bothell  city_Burien  \
0            0.0                 0.0           0.0          0.0   

   city_Carnation  ...  city_SeaTac  city_Seattle  city_Shoreline  \
0             0.0  ...          0.0           1.0             0.0   

   city_Skykomish  city_Snoqualmie  city_Snoqualmie Pass  city_Tukwila  \
0             0.0              0.0                   0.0           0.0   

   city_Vashon  city_Woodinville  city_Yarrow Point  
0          0.0               0.0                0.0  

[1 rows x 46 columns]
Linear Regression prediction: $486,779.68
Random Forest Regressor prediction: $435,526.56
Decision Tree Regressor prediction: $300,000.00