In [2]:
import pandas as pd
import numpy as np

from prophet import Prophet
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [189]:
accomodation = pd.read_csv("accomodation_data.csv")
prefectures = pd.read_csv("Prefecture_Japanese_Area.tsv", sep="\t", thousands=",")

In [190]:
accomodation.head()

Unnamed: 0,時点,地域コード,地域,延べ宿泊者数（総数）【人泊】,注記
0,2007年1月,0,全国,22136270,
1,2007年2月,0,全国,23346870,
2,2007年3月,0,全国,26997200,
3,2007年4月,0,全国,23729790,
4,2007年5月,0,全国,25636730,


In [191]:
accomodation.drop(["地域コード", "注記"], inplace=True, axis=1)
accomodation.rename(columns = {"時点": "time", "地域": "location", "延べ宿泊者数（総数）【人泊】": "accomodations"}, inplace=True)

In [192]:
accomodation.head()

Unnamed: 0,time,location,accomodations
0,2007年1月,全国,22136270
1,2007年2月,全国,23346870
2,2007年3月,全国,26997200
3,2007年4月,全国,23729790
4,2007年5月,全国,25636730


In [193]:
accomodation["time"] = accomodation["time"].str.replace('年','-').str.replace('月','')
accomodation = accomodation[~accomodation["time"].str.endswith("-")]
accomodation["time"] = pd.to_datetime(accomodation["time"].str.replace('年','-').str.replace('月',''), format='%Y-%m') 

In [194]:
accomodation

Unnamed: 0,time,location,accomodations
0,2007-01-01,全国,22136270
1,2007-02-01,全国,23346870
2,2007-03-01,全国,26997200
3,2007-04-01,全国,23729790
4,2007-05-01,全国,25636730
...,...,...,...
8059,2020-08-01,沖縄県,849500
8060,2020-09-01,沖縄県,884730
8061,2020-10-01,沖縄県,1285240
8062,2020-11-01,沖縄県,1415910


In [195]:
prefectures.head()

Unnamed: 0,Prefecture,Japanese,Area
0,Aichi,愛知県,5153.81
1,Akita,秋田県,11612.11
2,Aomori,青森県,9606.26
3,Chiba,千葉県,5156.15
4,Ehime,愛媛県,5676.44


In [196]:
assert(set(accomodation.location.to_list()) == set(prefectures.Japanese.to_list()))

In [197]:
accomodation = accomodation.merge(prefectures, left_on="location", right_on="Japanese")

In [198]:
accomodation.head()

Unnamed: 0,time,location,accomodations,Prefecture,Japanese,Area
0,2007-01-01,全国,22136270,Japan,全国,377915.0
1,2007-02-01,全国,23346870,Japan,全国,377915.0
2,2007-03-01,全国,26997200,Japan,全国,377915.0
3,2007-04-01,全国,23729790,Japan,全国,377915.0
4,2007-05-01,全国,25636730,Japan,全国,377915.0


In [199]:
accomodation.drop(["location", "Japanese"], axis=1, inplace=True)
accomodation.rename(columns = {"Prefecture": "location"}, inplace=True)

In [200]:
accomodation.to_csv("accomodation_processed.tsv", sep="\t", index=False)

In [3]:
accomodation = pd.read_csv("accomodation_processed.tsv", sep="\t")

In [4]:
accomodation.head()

Unnamed: 0,time,accomodations,location,Area
0,2007-01-01,22136270,Japan,377915.0
1,2007-02-01,23346870,Japan,377915.0
2,2007-03-01,26997200,Japan,377915.0
3,2007-04-01,23729790,Japan,377915.0
4,2007-05-01,25636730,Japan,377915.0


In [48]:
prophet_japan = accomodation[accomodation["location"] == "Japan"][["time", "accomodations"]]
prophet_japan.columns = ["ds", "y"]
prophet_japan["ds"] = pd.to_datetime(prophet_japan["ds"])

In [49]:
prophet_japan

Unnamed: 0,ds,y
0,2007-01-01,22136270
1,2007-02-01,23346870
2,2007-03-01,26997200
3,2007-04-01,23729790
4,2007-05-01,25636730
...,...,...
163,2020-08-01,26149030
164,2020-09-01,26020820
165,2020-10-01,32412890
166,2020-11-01,34501310


In [50]:
prophet_japan_2020 = prophet_japan[(prophet_japan["ds"] >= "2020-01-01")]
prophet_japan_before_2020 = prophet_japan[(prophet_japan["ds"] < "2020-01-01")]

In [51]:
m_japan = Prophet()
m_japan.fit(prophet_japan_before_2020)
future = pd.DataFrame(prophet_japan["ds"], columns=["ds"])
prediction = m_japan.predict(future)

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [52]:
prediction.tail()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
163,2020-08-01,51350040.0,61272670.0,65992900.0,51197150.0,51497530.0,12341820.0,12341820.0,12341820.0,12341820.0,12341820.0,12341820.0,0.0,0.0,0.0,63691860.0
164,2020-09-01,51560660.0,48746300.0,53366500.0,51381200.0,51739200.0,-521587.9,-521587.9,-521587.9,-521587.9,-521587.9,-521587.9,0.0,0.0,0.0,51039070.0
165,2020-10-01,51764470.0,50790250.0,55294100.0,51538960.0,51974090.0,1202464.0,1202464.0,1202464.0,1202464.0,1202464.0,1202464.0,0.0,0.0,0.0,52966940.0
166,2020-11-01,51975090.0,49317040.0,53893980.0,51707670.0,52221110.0,-439563.9,-439563.9,-439563.9,-439563.9,-439563.9,-439563.9,0.0,0.0,0.0,51535520.0
167,2020-12-01,52178910.0,46884530.0,51694380.0,51862640.0,52472280.0,-2868611.0,-2868611.0,-2868611.0,-2868611.0,-2868611.0,-2868611.0,0.0,0.0,0.0,49310290.0


In [53]:
def plot_prophet(original, prediction, title):
    trace1 = go.Scatter(
    x=original["ds"],
    y=original["y"],
    mode='markers',
    name = "Actual",
    marker_color='rgba(152, 0, 0, .8)',
    marker={'size': 15}
    )

    trace2 = go.Scatter(
        x=prediction["ds"],
        y=prediction["yhat"],
        name = "Prophet's fit and prediction",
        marker_color='rgb(129, 119, 185, 1)'
    )

    trace3 = go.Scatter(
        x=prediction["ds"],
        y=prediction["yhat_upper"],
        mode='lines',
            marker=dict(color="#444"),
            line=dict(width=0),
            showlegend=False
    )

    trace4 = go.Scatter(
        x=prediction["ds"],
        y=prediction["yhat_lower"],
        marker=dict(color="#444"),
            line=dict(width=0),
            mode='lines',
            fillcolor='rgba(78, 119, 185, 0.4)',
            fill='tonexty',
            showlegend=False
    )



    fig = make_subplots()
    
    fig.add_trace(trace2)
    fig.add_trace(trace3)
    fig.add_trace(trace4)
    fig.add_trace(trace1)
    #fig.write_image("fig1.png")
    fig.update_layout(height=800, width=1600, title_text=title,  xaxis_title="Time", yaxis_title="Accomodations", font=dict(size=22))
    return fig

In [54]:
fig_1 = plot_prophet(prophet_japan, prediction, "Accomodation counts in Japan between 2007 and 2020")
fig_1.show()

In [72]:
def potential_loss(prediction, actual, time):
    temp_df = prediction[prediction["ds"] >= time][["ds", "yhat"]]
    temp_df = temp_df.merge(prophet_japan[prophet_japan["ds"] >= time][["ds", "y"]], left_on="ds", right_on="ds")
    temp_df["diff"] = temp_df["yhat"] - temp_df["y"]

    return temp_df["diff"].sum()

In [73]:

potential_loss(prediction, prophet_japan, "2020-02-01")

303995427.9224303