## Import Modules

In [83]:
import pandas as pd
import numpy as np
import os
from prophet import Prophet
from prophet.utilities import regressor_index

## Load Dataset

In [84]:
# Read property sales data.
property_sales_df = pd.read_csv(r"Dataset\raw_sales.csv") 

## Prepare Dataset for Prophet.

In [85]:
# Get first five rows of the dataframe.
property_sales_df.head()

Unnamed: 0,datesold,postcode,price,propertyType,bedrooms
0,2007-02-07 00:00:00,2607,525000,house,4
1,2007-02-27 00:00:00,2906,290000,house,3
2,2007-03-07 00:00:00,2905,328000,house,3
3,2007-03-09 00:00:00,2905,380000,house,4
4,2007-03-21 00:00:00,2906,310000,house,3


In [86]:
# Rename columns to follow prophet data input format.
property_sales_df = property_sales_df.rename(columns = {
                                                        "datesold":"ds", 
                                                        "price":"y"
                                                       }
                                            )

# Remove unnecessary columns from the data.
property_sales_df = property_sales_df.drop(columns = [
                                                      "postcode"
                                                     ]
                                          )

# Convert datetime column to required format.
property_sales_df["ds"] = pd.to_datetime(property_sales_df["ds"])


# Group dataframe to aggregate sales for each property type.
property_sales_df = property_sales_df.groupby([
                                               "ds",
                                               "propertyType"
                                              ]
                                             )\
                                     .agg({
                                                "y":"sum",
                                                "bedrooms":"sum"
                                          }
                                         ).reset_index()

## Forecast Sales for 10 time steps into the future using Prophet.

In [90]:
# Define forecast function to apply to the entire dataframe. 

def forecast(grouped_data:pd.DataFrame()) -> pd.DataFrame():
    
    # Get the property type of the group.
    property_type = grouped_data["propertyType"].unique()[0]
    
    # Get min and max date of the time range.
    min_date = grouped_data["ds"].min()
    max_date = grouped_data["ds"].max()
    
    # Filter out columns required for training.
    train_df_sales = grouped_data[["ds", "y", "bedrooms"]]
    
    train_df_regressor = grouped_data[["ds", "bedrooms"]].rename(columns = {"bedrooms":"y"})
    
    # Initialize the prophet for regressor forecast.
    regressor_forecast_obj = Prophet()
    
    # Fit regressor data.
    regressor_forecast_obj = regressor_forecast_obj.fit(train_df_regressor)
    
    # Create future dataframe for 10 timesteps.
    future_dataframe_reg = regressor_forecast_obj.make_future_dataframe(periods = 10)
    
    # Generate forecast for the regressor.
    reg_forecast_df = regressor_forecast_obj.predict(future_dataframe_reg)
    
    
    # Slice the regressor forecast dataframe based to get the forecast period values.
    reg_forecast_df = reg_forecast_df[reg_forecast_df["ds"] > max_date]
    
    # Removing float values from the regressor and filter on required columns.
    reg_forecast_df["yhat"] = reg_forecast_df["yhat"].astype(int)
    reg_forecast_df = reg_forecast_df[["ds", "yhat"]].rename(columns = {"yhat":"bedrooms"})
    
    # Carry out multivariate forecast for sales.
    sales_forecast_obj = Prophet()
    
    # Add regressor to the model.
    sales_forecast_obj.add_regressor("bedrooms")
    
    # Fit the training data.
    sales_forecast_obj = sales_forecast_obj.fit(train_df_sales)
    
    # Use the regressor dataframe as the future dataframe
    
    sales_forecast = sales_forecast_obj.predict(reg_forecast_df)
    
    #Add property type to the dataframe as an identifier.
    sales_forecast['property_type'] = property_type
    
    # Get data used to compute regressor impact.
    sales_forecast["beta"] = float(sales_forecast_obj.params['beta'][:, regressor_index(sales_forecast_obj, "bedrooms")])
    sales_forecast["y_scale"] = sales_forecast_obj.y_scale
    
    # Filter dataframe on required columns.
    sales_forecast = sales_forecast[["property_type", "ds", "bedrooms", "yhat", "y_scale", "beta"]]
    
    return sales_forecast


results = property_sales_df.groupby(["propertyType"]).apply(forecast).reset_index(drop = True)



05:58:51 - cmdstanpy - INFO - Chain [1] start processing
05:58:51 - cmdstanpy - INFO - Chain [1] done processing
05:58:52 - cmdstanpy - INFO - Chain [1] start processing
05:58:53 - cmdstanpy - INFO - Chain [1] done processing
05:58:54 - cmdstanpy - INFO - Chain [1] start processing
05:58:54 - cmdstanpy - INFO - Chain [1] done processing
05:58:55 - cmdstanpy - INFO - Chain [1] start processing
05:58:56 - cmdstanpy - INFO - Chain [1] done processing


In [91]:
results

Unnamed: 0,property_type,ds,bedrooms,yhat,y_scale,beta
0,house,2019-07-28,-6059739.0,-531745.3,41606600.0,0.09264
1,house,2019-07-29,-1147086.0,3714332.0,41606600.0,0.09264
2,house,2019-07-30,-1556473.0,3320741.0,41606600.0,0.09264
3,house,2019-07-31,-737697.8,4091151.0,41606600.0,0.09264
4,house,2019-08-01,-1147086.0,3723588.0,41606600.0,0.09264
5,house,2019-08-02,-533003.9,4213941.0,41606600.0,0.09264
6,house,2019-08-03,899853.4,6778371.0,41606600.0,0.09264
7,house,2019-08-04,-5855045.0,-418621.4,41606600.0,0.09264
8,house,2019-08-05,-942391.7,3822819.0,41606600.0,0.09264
9,house,2019-08-06,-1351780.0,3426503.0,41606600.0,0.09264
