In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("charlotte_cleaned_data.csv")
target_col = 'median_sale_price'

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df_cleaned_cols = df.dropna(axis=1, how='all')


df_final = df_cleaned_cols.dropna(subset=['median_list_price', 'median_ppsf'])

In [None]:
df_final.info()

In [None]:
df_final['period_begin'] = pd.to_datetime(df['period_begin'])
df_final = df_final.set_index('period_begin').sort_index()
# df = df.drop(columns=['period_end'])


In [None]:
df_final = df_final.dropna(subset=[target_col])

In [None]:
y = df_final['median_sale_price']

X = df_final[['median_list_price','median_ppsf']]

In [None]:
test_size_ratio = 0.2
test_size = max(1, int(len(X) * test_size_ratio))

X_train = X[:-test_size]
X_test = X[-test_size:]
y_train = y[:-test_size]
y_test = y[-test_size:]

In [None]:
print(X_train.shape)
print(y_train.shape)


In [None]:
model = RandomForestRegressor(
    n_estimators=100,
    random_state=42,
    n_jobs=-1,
    max_depth=10,
    min_samples_split=5
)

model.fit(X_train, y_train)


y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

In [None]:
def evaluate_model(y_true, y_pred, set_name):
    """Calculates R-squared, RMSE, and MAE."""
    
    r2 = r2_score(y_true, y_pred)
    

    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
 
    mae = mean_absolute_error(y_true, y_pred)
    
   
    return {
        'Set': set_name,
        'R-squared (R^2)': r2,
        'RMSE': rmse,
        'MAE': mae
    }

In [None]:
evaluate_model(y_test, y_pred_test, 'test_set')

In [None]:
evaluate_model(y_train, y_pred_train, 'train_set')

In [None]:
gbr_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbr_model.fit(X_train, y_train)
gbr_pred_test = gbr_model.predict(X_test)

In [None]:
evaluate_model(y_test, gbr_pred_test, 'test_set')