<a href="https://colab.research.google.com/github/jeonghojo00/HousingPricePrediction/blob/main/HousingPricePrediction_byZip_forFuture_Prophet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import json
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from prophet.serialize import model_to_json, model_from_json

## Step 0. Connect to Google Drive

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Step 1. Define Functions

### Load Data

In [5]:
def load_data():
    colab_notebook = "/content/drive/My Drive/Colab Notebooks/"
    zillow_dataset = "Zillow/AllHomesZipPrices.csv"
    df = pd.read_csv(colab_notebook+zillow_dataset)
    return df

### Get City Dataframe

In [6]:
def get_city_df(df, state, city):
    state_df = df[df['state'] == state]
    city_df = state_df[state_df['city'] == city]

    return city_df

### Get House Price Prediction

In [7]:
def get_HousingPricePrediction(city_df, zip, period = 24):
    zip_df = city_df[city_df['zip']==zip].loc[:,('ds', 'y')]
    md = Prophet(interval_width=0.95, weekly_seasonality=True, daily_seasonality=True)
    model = md.fit(zip_df)
    future = model.make_future_dataframe(periods=period, freq='M') #freq changes frequency of dates. Default Daily
    pred_y = model.predict(future)

    return md, pred_y

### Save Model

In [8]:
def save_prophet_model(model, filename):
  colab_notebook = "/content/drive/My Drive/Colab Notebooks/models/"
  with open(colab_notebook + filename, 'w') as fout:
    json.dump(model_to_json(model), fout)

### Load Model

In [9]:
def load_prophet_model(filename):
  colab_notebook = "/content/drive/My Drive/Colab Notebooks/models/"
  with open(colab_notebook+filename, 'r') as fin:
    model = model_from_json(json.load(fin)) 
    return model

## Step 2. Run Codes for One Zip Code Area

### Load Dataset

In [10]:
df = load_data()

### Get City Dataframe

In [11]:
state = 'TX'
city = 'Houston'
city_df = get_city_df(df, state, city)
zipCodes = city_df['zip'].unique()

print(zipCodes)

[77084 77036 77077 77095 77082 77007 77057 77083 77070 77042 77063 77081
 77008 77072 77089 77015 77040 77339 77024 77088 77099 77090 77055 77044
 77004 77064 77080 77060 77006 77009 77065 77079 77035 77096 77054 77056
 77092 77074 77025 77073 77093 77034 77019 77075 77598 77018 77027 77041
 77014 77049 77021 77087 77047 77091 77017 77066 77005 77043 77022 77071
 77023 77016 77067 77045 77098 77058 77062 77076 77345 77033 77061 77053
 77020 77038 77069 77086 77026 77039 77048 77051 77002 77030 77011 77012
 77059 77028 77013 77031 77029 77003 77336 77085 77037 77078 77068 77094
 77032 77050 77046 77010]


### Select one of zip codes

In [12]:
zip = 77084

### Get House Price Prediction

In [13]:
# Train and predict using Prophet model
md, pred_y = get_HousingPricePrediction(city_df, zip, period = 60)

# Plot
plot_plotly(md, pred_y)

### Save Model

In [14]:
filename = city+state+str(zip)+'.json'
print(filename)

HoustonTX77084.json


In [15]:
save_prophet_model(md, filename)

### Re-load the model saved to check

In [16]:
loaded_model = load_prophet_model(filename)

In [17]:
period = 36
future = loaded_model.make_future_dataframe(periods=period, freq='MS') #freq changes frequency of dates. Default Daily
pred_y = loaded_model.predict(future)

In [18]:
plot_plotly(loaded_model, pred_y)

## Step 3. Run Codes for selected states, cities, and all the zip areas belong to those. 

### Make a list of periods to predict

In [22]:
import datetime as dt
import dateutil as du
base_date = '2020-12'
base_date = dt.datetime.strptime(base_date, "%Y-%m")
months = [6, 12, 24, 36, 48, 60]
target_month = list()

for month in months:
    a_month = du.relativedelta.relativedelta(months=month)
    date_plus_month = (base_date + a_month).strftime('%Y-%m')
    target_month.append(date_plus_month)
print(target_month)

['2021-06', '2021-12', '2022-12', '2023-12', '2024-12', '2025-12']


### Load Dataset

In [23]:
df = load_data()
zip_dict = dict()
df = df[df['ds']<'2021-01-01'] #Will train up to December 2020.
new_df = df
new_df.loc[:,'ds'] = pd.to_datetime(df.loc[:,'ds'], format='%Y-%m-%d')

allresults = pd.DataFrame(columns=['state', 'city', 'zip', 'Current', '6M', '12M', '24M', '36M', '48M', '60M'])

### Get Housing Prices for each area

#### Houston, TX

In [24]:
##### Get Dataset for Cities to run simulations
state = 'TX'
city = 'Houston'
city_df = get_city_df(df, state, city)

zips = city_df['zip'].unique()
zip_dict[city+state] = list(zips)

print('Number of Zip Code Areas in '+city+', '+state+': ', len(zips))

##### Make Prophet models and Save as json files
period = 60 ## in month
for zip in zips:
    current_val = new_df[(new_df['state']==state) & (new_df['city']==city) & (new_df['zip']==int(zip))]['y'].iloc[-1]

    # Train and predict using Prophet model
    md, pred_y = get_HousingPricePrediction(city_df, zip, period = period)
    pred_y['ds'] = pred_y['ds'].dt.strftime('%Y-%m')

    result = list()
    for month in target_month:
        yAt = pred_y[pred_y['ds']==target_month[0]]['yhat'].item()
        result.append(yAt)

    eachRow = {'state': state, 'city': city, 'zip': zip, 'Current': current_val, '6M': result[0], '12M': result[1], '24M': result[2], '36M': result[3], '48M': result[4], '60M': result[5]}
    allresults = allresults.append(eachRow, ignore_index=True)

    # Save the model as a json file
    filename = city+state+str(zip)+'.json'
    save_prophet_model(md, filename)


  
print("Completed to save "+str(len(zips))+" zip code areas in "+ city + ', '+ state)

Number of Zip Code Areas in Houston, TX:  100
Completed to save 100 zip code areas in Houston, TX


#### Dallas, TX

In [25]:
##### Get Dataset for Cities to run simulations
state = 'TX'
city = 'Dallas'
city_df = get_city_df(df, state, city)

zips = city_df['zip'].unique()
zip_dict[city+state] = list(zips)

print('Number of Zip Code Areas in '+city+', '+state+': ', len(zips))

##### Make Prophet models and Save as json files
period = 60 ## in month
for zip in zips:
    current_val = new_df[(new_df['state']==state) & (new_df['city']==city) & (new_df['zip']==int(zip))]['y'].iloc[-1]

    # Train and predict using Prophet model
    md, pred_y = get_HousingPricePrediction(city_df, zip, period = period)
    pred_y['ds'] = pred_y['ds'].dt.strftime('%Y-%m')

    result = list()
    for month in target_month:
        yAt = pred_y[pred_y['ds']==target_month[0]]['yhat'].item()
        result.append(yAt)

    eachRow = {'state': state, 'city': city, 'zip': zip, 'Current': current_val, '6M': result[0], '12M': result[1], '24M': result[2], '36M': result[3], '48M': result[4], '60M': result[5]}
    allresults = allresults.append(eachRow, ignore_index=True)

    # Save the model as a json file
    filename = city+state+str(zip)+'.json'
    save_prophet_model(md, filename)


  
print("Completed to save "+str(len(zips))+" zip code areas in "+ city + ', '+ state)

Number of Zip Code Areas in Dallas, TX:  45
Completed to save 45 zip code areas in Dallas, TX


#### Boston, MA

In [26]:
##### Get Dataset for Cities to run simulations

state = 'MA'
city = 'Boston'
city_df = get_city_df(df, state, city)

zips = city_df['zip'].unique()
zip_dict[city+state] = list(zips)

print('Number of Zip Code Areas in '+city+', '+state+': ', len(zips))

##### Make Prophet models and Save as json files
period = 60 ## in month
for zip in zips:
    current_val = new_df[(new_df['state']==state) & (new_df['city']==city) & (new_df['zip']==int(zip))]['y'].iloc[-1]

    # Train and predict using Prophet model
    md, pred_y = get_HousingPricePrediction(city_df, zip, period = period)
    pred_y['ds'] = pred_y['ds'].dt.strftime('%Y-%m')

    result = list()
    for month in target_month:
        yAt = pred_y[pred_y['ds']==target_month[0]]['yhat'].item()
        result.append(yAt)

    eachRow = {'state': state, 'city': city, 'zip': zip, 'Current': current_val, '6M': result[0], '12M': result[1], '24M': result[2], '36M': result[3], '48M': result[4], '60M': result[5]}
    allresults = allresults.append(eachRow, ignore_index=True)

    # Save the model as a json file
    filename = city+state+str(zip)+'.json'
    save_prophet_model(md, filename)


  
print("Completed to save "+str(len(zips))+" zip code areas in "+ city + ', '+ state)

Number of Zip Code Areas in Boston, MA:  28
Completed to save 28 zip code areas in Boston, MA


### Print all results of Future Prediction

In [27]:
allresults

Unnamed: 0,state,city,zip,Current,6M,12M,24M,36M,48M,60M
0,TX,Houston,77084,202660.0,1.991471e+05,1.991471e+05,1.991471e+05,1.991471e+05,1.991471e+05,1.991471e+05
1,TX,Houston,77036,147215.0,1.488807e+05,1.488807e+05,1.488807e+05,1.488807e+05,1.488807e+05,1.488807e+05
2,TX,Houston,77077,317217.0,3.082879e+05,3.082879e+05,3.082879e+05,3.082879e+05,3.082879e+05,3.082879e+05
3,TX,Houston,77095,247815.0,2.405546e+05,2.405546e+05,2.405546e+05,2.405546e+05,2.405546e+05,2.405546e+05
4,TX,Houston,77082,198764.0,1.990344e+05,1.990344e+05,1.990344e+05,1.990344e+05,1.990344e+05,1.990344e+05
...,...,...,...,...,...,...,...,...,...,...
168,MA,Boston,2210,1186806.0,1.244056e+06,1.244056e+06,1.244056e+06,1.244056e+06,1.244056e+06,1.244056e+06
169,MA,Boston,2109,892584.0,9.230823e+05,9.230823e+05,9.230823e+05,9.230823e+05,9.230823e+05,9.230823e+05
170,MA,Boston,2108,1341095.0,1.393905e+06,1.393905e+06,1.393905e+06,1.393905e+06,1.393905e+06,1.393905e+06
171,MA,Boston,2110,1290866.0,1.378934e+06,1.378934e+06,1.378934e+06,1.378934e+06,1.378934e+06,1.378934e+06
