## Import Modules

In [30]:
import pandas as pd
import numpy as np
import os
from prophet import Prophet

## Load Dataset

In [31]:
# Read property sales data.
property_sales_df = pd.read_csv(r"Dataset\raw_sales.csv") 

## Prepare Dataset for Prophet.

In [32]:
# Get first five rows of the dataframe.
property_sales_df.head()

Unnamed: 0,datesold,postcode,price,propertyType,bedrooms
0,2007-02-07 00:00:00,2607,525000,house,4
1,2007-02-27 00:00:00,2906,290000,house,3
2,2007-03-07 00:00:00,2905,328000,house,3
3,2007-03-09 00:00:00,2905,380000,house,4
4,2007-03-21 00:00:00,2906,310000,house,3


In [33]:
# Rename columns to follow prophet data input format.
property_sales_df = property_sales_df.rename(columns = {
                                                        "datesold":"ds", 
                                                        "price":"y"
                                                       }
                                            )

# Remove unnecessary columns from the data.
property_sales_df = property_sales_df.drop(columns = [
                                                      "postcode"
                                                     ]
                                          )

# Convert datetime column to required format.
property_sales_df["ds"] = pd.to_datetime(property_sales_df["ds"])


# Group dataframe to aggregate sales for each property type.
property_sales_df = property_sales_df.groupby([
                                               "ds",
                                               "propertyType"
                                              ]
                                             )\
                                     .agg({
                                                "y":"sum",
                                                "bedrooms":"sum"
                                          }
                                         ).reset_index()

## Forecast Sales for 10 time steps into the future using Prophet.

In [43]:
# Define forecast function to apply to the entire dataframe. 

def forecast(grouped_data:pd.DataFrame()) -> pd.DataFrame():
    
    # Get the property type of the group.
    property_type = grouped_data["propertyType"].unique()[0]
    
    # Get min and max date of the time range.
    min_date = grouped_data["ds"].min()
    max_date = grouped_data["ds"].max()
    
    # Filter out columns required for training.
    train_df_sales = grouped_data[["ds", "y", "bedrooms"]]
    
    train_df_regressor = grouped_data[["ds", "bedrooms"]].rename(columns = {"bedrooms":"y"})
    
    # Initialize the prophet for regressor forecast.
    regressor_forecast_obj = Prophet()
    
    # Fit regressor data.
    regressor_forecast_obj = regressor_forecast_obj.fit(train_df_regressor)
    
    # Create future dataframe for 10 timesteps.
    future_dataframe_reg = regressor_forecast_obj.make_future_dataframe(periods = 10)
    
    # Generate forecast for the regressor.
    reg_forecast_df = regressor_forecast_obj.predict(future_dataframe_reg)
    
    # # Add property type to the dataframe as an identifier.
    # reg_forecast_df['property_type'] = property_type
    
    # Clean the regressor forecast dataframe.
    reg_forecast_df = reg_forecast_df[reg_forecast_df["ds"] > max_date]
    
    
    return reg_forecast_df


result = property_sales_df.groupby(["propertyType"]).apply(forecast).reset_index(drop = True)



04:35:58 - cmdstanpy - INFO - Chain [1] start processing
04:35:58 - cmdstanpy - INFO - Chain [1] done processing
04:36:00 - cmdstanpy - INFO - Chain [1] start processing
04:36:00 - cmdstanpy - INFO - Chain [1] done processing


In [44]:
result

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat,property_type
0,2007-02-07,-0.388205,-14.230118,22.345024,-0.388205,-0.388205,2.870060,2.870060,2.870060,3.787801,3.787801,3.787801,-0.917742,-0.917742,-0.917742,0.0,0.0,0.0,2.481855,house
1,2007-02-27,-0.112144,-13.618557,23.675228,-0.112144,-0.112144,4.209377,4.209377,4.209377,-0.304427,-0.304427,-0.304427,4.513803,4.513803,4.513803,0.0,0.0,0.0,4.097232,house
2,2007-03-07,-0.001720,-9.466974,26.766258,-0.001720,-0.001720,8.527854,8.527854,8.527854,3.787801,3.787801,3.787801,4.740052,4.740052,4.740052,0.0,0.0,0.0,8.526133,house
3,2007-03-09,0.025886,-9.560382,27.648884,0.025886,0.025886,9.232568,9.232568,9.232568,4.626239,4.626239,4.626239,4.606329,4.606329,4.606329,0.0,0.0,0.0,9.258454,house
4,2007-03-21,0.191522,-10.141333,27.603096,0.191522,0.191522,7.381782,7.381782,7.381782,3.787801,3.787801,3.787801,3.593981,3.593981,3.593981,0.0,0.0,0.0,7.573304,house
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5658,2019-08-01,5.466619,2.372569,9.301225,5.466619,5.466619,0.398347,0.398347,0.398347,0.577939,0.577939,0.577939,-0.179591,-0.179591,-0.179591,0.0,0.0,0.0,5.864967,unit
5659,2019-08-02,5.466781,3.381242,10.067382,5.466781,5.466781,1.100362,1.100362,1.100362,1.256416,1.256416,1.256416,-0.156054,-0.156054,-0.156054,0.0,0.0,0.0,6.567144,unit
5660,2019-08-03,5.466943,0.497564,7.318760,5.466943,5.466943,-1.535822,-1.535822,-1.535822,-1.401866,-1.401866,-1.401866,-0.133956,-0.133956,-0.133956,0.0,0.0,0.0,3.931122,unit
5661,2019-08-04,5.467106,-0.582194,6.244612,5.467106,5.467106,-2.913506,-2.913506,-2.913506,-2.799572,-2.799572,-2.799572,-0.113933,-0.113933,-0.113933,0.0,0.0,0.0,2.553600,unit
