In [1]:
from prophet import Prophet
import itertools
from prophet.diagnostics import cross_validation, performance_metrics
import plotly.offline as py
import matplotlib as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import holidays
from sklearn.metrics import r2_score, mean_absolute_error
import base64
import math

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
training_upto = 2020
forecast_upto = (2023-training_upto-1)*12 + 24

In [3]:
the_file = pd.read_csv('C:/Users/20212828/Downloads/Bigfile/filtered_df_utility_cols.csv')

In [4]:
# Read data, get dataframe with crimes/month
monthly_grouped_df = the_file.copy()
monthly_grouped_df= monthly_grouped_df.sort_values(['Year','Month num'],ascending=False).groupby(['Year', 'Month num'])['Crime type'].count()
monthly_grouped_df = monthly_grouped_df.to_frame()


# Rename columns, select rows with data and set datetime index
monthly_grouped_df = monthly_grouped_df.rename(columns = {'Month num':'y'})
monthly_grouped_df['day'] = 1
monthly_grouped_df = monthly_grouped_df.reset_index()
# monthly_grouped_df = monthly_grouped_df[(monthly_grouped_df['Year']>=2011) | ((monthly_grouped_df['Year'] == 2010) & (monthly_grouped_df['Month num']==12))]
monthly_grouped_df = monthly_grouped_df.rename(columns={'Month num': 'month', 'Year': 'year'})
monthly_grouped_df['ds'] = pd.to_datetime(monthly_grouped_df[['year', 'month', 'day']])
monthly_grouped_df['y'] = monthly_grouped_df['Crime type']
monthly_grouped_df['covid']  = 0
monthly_grouped_df['covid'][((monthly_grouped_df['year']==2020) & (monthly_grouped_df['month']>2)) | ((monthly_grouped_df['year']==2021)& (monthly_grouped_df['month']<6))]
# monthly_grouped_df = monthly_grouped_df.set_index(pd.DatetimeIndex(monthly_grouped_df['Date']))

# Getting Holidays
holiday = pd.DataFrame([])
for date, name in sorted(holidays.country_holidays('GB', subdiv='England', years=[2012, 2013, 2014, 2015, 2016, 2017, 2018,2019,2020,2021, 2022, 2023, 2024]).items()):
    #holiday = holiday.append(pd.DataFrame({'ds': date, 'holiday': "England-Holidays"}, index=[0]), ignore_index=True)
    holiday= pd.concat([holiday, pd.DataFrame({'ds': date, 'holiday': "England-Holidays"}, index=[0])], ignore_index=True)

holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

# Divide test and train data
train_data = monthly_grouped_df[monthly_grouped_df['year']<(training_upto+1)]
test_data = monthly_grouped_df[(monthly_grouped_df['year']>training_upto)]

monthly_grouped_df

Unnamed: 0,year,month,Crime type,day,ds,y,covid
0,2010,12,372,1,2010-12-01,372,0
1,2011,1,486,1,2011-01-01,486,0
2,2011,2,403,1,2011-02-01,403,0
3,2011,3,408,1,2011-03-01,408,0
4,2011,4,414,1,2011-04-01,414,0
...,...,...,...,...,...,...,...
143,2022,11,246,1,2022-11-01,246,0
144,2022,12,228,1,2022-12-01,228,0
145,2023,1,242,1,2023-01-01,242,0
146,2023,2,199,1,2023-02-01,199,0


In [5]:
param_grid = { 
    'seasonality_mode':('multiplicative','additive'), 
    'changepoint_prior_scale': [0.5, 0.1, 1,0.01],
    'seasonality_prior_scale': [0.5, 0.1, 1, 0.01],
    'holidays_prior_scale':[1, 10, 25, 50],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
maes = []  # Store the RMSEs for each params here

cutoffs = pd.date_range(start='2015-01-01', end='2021-12-01', freq='2MS')

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params, holidays= holiday).fit(monthly_grouped_df)  # Fit model with given params
    df_cv = cross_validation(m, cutoffs=cutoffs, horizon='365 days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)
    maes.append(df_p['mae'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mae'] = maes
print("Mean absolute error: ", np.argmin(maes))
best_params = all_params[np.argmin(maes)]
print("Best parameters", best_params)

13:20:16 - cmdstanpy - INFO - Chain [1] start processing
13:20:16 - cmdstanpy - INFO - Chain [1] done processing
13:20:28 - cmdstanpy - INFO - Chain [1] start processing
13:20:28 - cmdstanpy - INFO - Chain [1] done processing
13:20:37 - cmdstanpy - INFO - Chain [1] start processing
13:20:37 - cmdstanpy - INFO - Chain [1] done processing
13:20:46 - cmdstanpy - INFO - Chain [1] start processing
13:20:46 - cmdstanpy - INFO - Chain [1] done processing
13:20:55 - cmdstanpy - INFO - Chain [1] start processing
13:20:55 - cmdstanpy - INFO - Chain [1] done processing
13:21:04 - cmdstanpy - INFO - Chain [1] start processing
13:21:04 - cmdstanpy - INFO - Chain [1] done processing
13:21:13 - cmdstanpy - INFO - Chain [1] start processing
13:21:13 - cmdstanpy - INFO - Chain [1] done processing
13:21:23 - cmdstanpy - INFO - Chain [1] start processing
13:21:23 - cmdstanpy - INFO - Chain [1] done processing
13:21:31 - cmdstanpy - INFO - Chain [1] start processing
13:21:31 - cmdstanpy - INFO - Chain [1]

Mean absolute error:  59
Best parameters {'seasonality_mode': 'multiplicative', 'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 1, 'holidays_prior_scale': 50}


In [5]:
import plotly.express as px

best_model = Prophet(seasonality_mode= 'multiplicative', changepoint_prior_scale= 0.01, seasonality_prior_scale= 1, holidays_prior_scale= 50, holidays= holiday)
best_model.add_regressor('covid')
best_model.fit(train_data)
future_dates = best_model.make_future_dataframe(periods=forecast_upto, freq='MS')
future_dates['covid'] = 0
forecast = best_model.predict(future_dates)

df_plot_pred = forecast[-forecast_upto:][['ds', 'yhat']]

df_plot_true = monthly_grouped_df[['ds', 'y']]

fig = px.line(df_plot_true, x='ds', y='y', title='Model predictions for Barnet')
fig.add_scatter(x=df_plot_pred['ds'], y=df_plot_pred['yhat'], name='Prediction')
fig.update_xaxes(title_text='Date')
fig.update_yaxes(title_text='Number of Predicted Burglaries')

14:36:31 - cmdstanpy - INFO - Chain [1] start processing
14:36:31 - cmdstanpy - INFO - Chain [1] done processing


In [6]:
# Read data, get dataframe with crimes/month
monthly_grouped_df = the_file.copy()
monthly_grouped_df= monthly_grouped_df.sort_values(['Year','Month num'],ascending=False).groupby(['Year', 'Month num'])['Crime type'].count()
monthly_grouped_df = monthly_grouped_df.to_frame()


# Rename columns, select rows with data and set datetime index
monthly_grouped_df = monthly_grouped_df.rename(columns = {'Month num':'y'})
monthly_grouped_df['day'] = 1
monthly_grouped_df = monthly_grouped_df.reset_index()
# monthly_grouped_df = monthly_grouped_df[(monthly_grouped_df['Year']>=2011) | ((monthly_grouped_df['Year'] == 2010) & (monthly_grouped_df['Month num']==12))]
monthly_grouped_df = monthly_grouped_df.rename(columns={'Month num': 'month', 'Year': 'year'})
monthly_grouped_df['ds'] = pd.to_datetime(monthly_grouped_df[['year', 'month', 'day']])
monthly_grouped_df['y'] = monthly_grouped_df['Crime type']
# monthly_grouped_df = monthly_grouped_df.set_index(pd.DatetimeIndex(monthly_grouped_df['Date']))

# Getting Holidays
holiday = pd.DataFrame([])
for date, name in sorted(holidays.country_holidays('GB', subdiv='England', years=[2012, 2013, 2014, 2015, 2016, 2017, 2018,2019,2020,2021, 2022, 2023, 2024]).items()):
#holiday = holiday.append(pd.DataFrame({'ds': date, 'holiday': "England-Holidays"}, index=[0]), ignore_index=True)
        holiday= pd.concat([holiday, pd.DataFrame({'ds': date, 'holiday': "England-Holidays"}, index=[0])], ignore_index=True)

holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

# Divide test and train data
train_data = monthly_grouped_df[monthly_grouped_df['year']<(training_upto+1)]
test_data = monthly_grouped_df[(monthly_grouped_df['year']>training_upto)]

best_model = Prophet(seasonality_mode= 'multiplicative', changepoint_prior_scale= 0.01, seasonality_prior_scale= 0.15, holidays_prior_scale= 0.015, holidays= holiday).fit(train_data)
future_dates = best_model.make_future_dataframe(periods=forecast_upto, freq='MS')
forecast = best_model.predict(future_dates)

df_plot_pred = forecast[-forecast_upto:][['ds', 'yhat']]

df_plot_pred = df_plot_pred[df_plot_pred['ds'] > '2022-12-31']
df_plot_pred = df_plot_pred.reset_index()
df_plot_pred = df_plot_pred.drop(['index'], axis=1)

# Create the dataframe
file =  the_file.copy()
df_per_ward= file.sort_values(['Year','Month num'],ascending=False)
df_per_ward['Population per square kilometre'].fillna(0, inplace=True)
df_per_ward = df_per_ward.groupby(['Ward', 'Year', 'Month num', 'Population per square kilometre'])['Crime type'].count()
# Create a dataframe with the wanted dates
dates = pd.date_range(start='12/1/2010', end='12/31/2024', freq='MS').to_frame()
df_per_ward = df_per_ward.reset_index()
# Specify columns: ds, y, Date
df_per_ward['day'] = 1
df_per_ward = df_per_ward.reset_index()
df_per_ward['y'] = df_per_ward['Crime type']
df_per_ward = df_per_ward.rename(columns={'Month num': 'month', 'Year': 'year'})
df_per_ward['ds'] = pd.to_datetime(df_per_ward[['year', 'month', 'day']])
df_per_ward['Date'] = df_per_ward['ds']
dates['Date'] = dates[0]
dates = dates[['Date']]


# Loop over wards
wards = ['Barnet Vale', 'Brunswick Park', 'Burnt Oak', 'Childs Hill', 'Colindale North', 
         'Colindale South', 'Cricklewood', 'East Barnet', 'East Finchley', 'Edgware', 'Edgwarebury',
           'Finchley Church End', 'Friern Barnet', 'Garden Suburb', 'Golders Green', 'Hendon',
             'High Barnet', 'Mill Hill', 'Totteridge and Woodside', 
         'Underhill', 'West Finchley', 'West Hendon', 'Whetstone', 'Woodhouse']

for ward in wards: 
       # Select data of ward
       df_ward = df_per_ward[df_per_ward['Ward']==ward]
       df_ward = df_ward[['ds', 'Date', 'year', 'y', 'Population per square kilometre']]
       # Merge with all dates to have a consistent dataframe
       df_ward = pd.merge(dates, df_ward, how="left", on='Date')
       df_ward['ds'] = df_ward['Date']
       df_ward['y'] = df_ward['y'].fillna(0)
       df_ward = df_ward.fillna(df_ward.mean())
       df_ward['year'] = pd.DatetimeIndex(df_ward['Date']).year
       train_ward = df_ward[df_ward['year']<(training_upto+1)]
       test_ward = df_ward[(df_ward['year']>training_upto)] 
       best_model = best_model = Prophet(seasonality_mode= 'additive', changepoint_prior_scale= 0.01, seasonality_prior_scale= 0.15, holidays_prior_scale= 0.015, holidays= holiday)
       best_model.add_regressor('Population per square kilometre')
       best_model.fit(train_ward)
       future_dates = best_model.make_future_dataframe(periods=forecast_upto, freq='MS')
       future_dates = pd.merge(future_dates, df_ward, how="left", on='ds')
       forecastward = best_model.predict(future_dates)
       #print(ward)
       
       df_ward_pred = forecastward[-forecast_upto:][['ds', 'yhat']]

       df_ward_pred = df_ward_pred[df_ward_pred['ds'] > '2022-12-31']
       df_ward_pred = df_ward_pred.reset_index()
       df_ward_pred = df_ward_pred.drop(['index'], axis=1)

       ward_pred_list = []
       for value in df_ward_pred['yhat']:
             ward_pred_list.append(value)

       df_plot_pred[ward] = ward_pred_list

14:36:31 - cmdstanpy - INFO - Chain [1] start processing
14:36:31 - cmdstanpy - INFO - Chain [1] done processing
14:36:32 - cmdstanpy - INFO - Chain [1] start processing
14:36:32 - cmdstanpy - INFO - Chain [1] done processing
14:36:32 - cmdstanpy - INFO - Chain [1] start processing
14:36:32 - cmdstanpy - INFO - Chain [1] done processing
14:36:32 - cmdstanpy - INFO - Chain [1] start processing
14:36:32 - cmdstanpy - INFO - Chain [1] done processing
14:36:32 - cmdstanpy - INFO - Chain [1] start processing
14:36:32 - cmdstanpy - INFO - Chain [1] done processing
14:36:33 - cmdstanpy - INFO - Chain [1] start processing
14:36:33 - cmdstanpy - INFO - Chain [1] done processing
14:36:33 - cmdstanpy - INFO - Chain [1] start processing
14:36:33 - cmdstanpy - INFO - Chain [1] done processing
14:36:33 - cmdstanpy - INFO - Chain [1] start processing
14:36:33 - cmdstanpy - INFO - Chain [1] done processing
14:36:33 - cmdstanpy - INFO - Chain [1] start processing
14:36:34 - cmdstanpy - INFO - Chain [1]

In [7]:
holiday = pd.DataFrame([])
for date, name in sorted(holidays.country_holidays('GB', subdiv='England', years=[2012, 2013, 2014, 2015, 2016, 2017, 2018,2019,2020,2021, 2022, 2023, 2024]).items()):
    holiday= pd.concat([holiday, pd.DataFrame({'ds': date, 'holiday': "England-Holidays"}, index=[0])], ignore_index=True)

holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

ward = 'Brunswick Park'
df_ward = df_per_ward[df_per_ward['Ward']==ward]
df_ward = df_ward[['ds', 'Date', 'year', 'y', 'Population per square kilometre']]
df_ward = pd.merge(dates, df_ward, how="left", on='Date')
df_ward['ds'] = df_ward['Date']
df_ward['y'] = df_ward['y'].fillna(0)
df_ward = df_ward.fillna(df_ward.mean())
df_ward['year'] = pd.DatetimeIndex(df_ward['Date']).year
train_ward = df_ward[df_ward['year']<(training_upto+1)]
test_ward = df_ward[(df_ward['year']>training_upto)] 
#print((df_ward['Population'].isnull().sum()))
#print(df_ward.columns)
best_model = Prophet(seasonality_mode= 'multiplicative', changepoint_prior_scale= 0.01, seasonality_prior_scale= 0.15, holidays_prior_scale= 0.015, holidays= holiday)
best_model.add_regressor('Population per square kilometre')
best_model.fit(train_ward)
future_dates = best_model.make_future_dataframe(periods=forecast_upto, freq='MS')
#print(future_dates)
future_dates = pd.merge(future_dates, df_ward, how="left", on='ds')
forecastward = best_model.predict(future_dates)
#print(ward)
#print(r2_score(test_ward[['y']], forecast['yhat'][-forecast_upto:]))

df_ward_true = df_ward[['ds', 'y']]
df_ward_true = df_ward_true[(df_ward_true != 0).all(1)]


df_ward_pred = forecastward[-forecast_upto:][['ds', 'yhat']]

figward = px.line(df_ward_true, x='ds', y='y', title='Model predictions for specific ward in Barnet')
figward.add_scatter(x=df_ward_pred['ds'], y=df_ward_pred['yhat'], name='Prediction')
figward.update_xaxes(title_text='Date')
figward.update_yaxes(title_text='Number of Predicted Burglaries')

14:36:38 - cmdstanpy - INFO - Chain [1] start processing
14:36:38 - cmdstanpy - INFO - Chain [1] done processing


In [8]:
filtered_df = pd.read_csv ('C:/Users/20212828/Downloads/Bigfile/filtered_df.csv')
df_bar = filtered_df['Ward new'].value_counts()

figure1 = px.bar(df_bar, title="Number of burglaries by ward between Dec 2010 and March 2023")
figure1.update_layout(
    xaxis_title="Ward",
    yaxis_title="Number of burglaries")
figure1.update_xaxes(tickangle=90)
figure1.show()

In [9]:
filtered_df_new = the_file.copy()

fig_w = px.line(filtered_df_new.groupby(['Ward', 'Year']).size().reset_index(name='Burglary Rate'), x='Year', y='Burglary Rate', color='Ward',
                   title="Seasonal Plot: Ward Burglary Cases Growth")
fig_w.update_yaxes(title="Number of Cases")
fig_w.update_traces(mode="markers+lines", hovertemplate=None)
fig_w.update_layout(hovermode="x unified")

fig_w.show()

In [10]:
from dash import Dash, html, dcc
from jupyter_dash import JupyterDash
from dash import Input, Output

df_ward_future_pred = df_ward_pred[df_ward_pred['ds'] > '2022-12-31']
df_ward_future_pred = df_ward_future_pred.reset_index()
df_ward_future_pred = df_ward_future_pred.drop(['index'], axis=1)
df_ward_future_pred['yhat'] = df_ward_future_pred['yhat'].round()
df_ward_future_pred['yhat'] = df_ward_future_pred['yhat'].fillna(0).astype(int)
df_ward_future_pred['ds'] = df_ward_future_pred['ds'].values.astype(str)
df_ward_future_pred['ds'] = df_ward_future_pred['ds'].str.split('T')
df_ward_future_pred['ds'] = df_ward_future_pred['ds']
df_ward_future_pred['ds'] = df_ward_future_pred['ds'].apply(lambda x: str(x[0]))
df_ward_future_pred

wards = ['Barnet Vale', 'Brunswick Park', 'Burnt Oak', 'Childs Hill', 'Colindale North', 
         'Colindale South', 'Cricklewood', 'East Barnet', 'East Finchley', 'Edgware', 'Edgwarebury',
           'Finchley Church End', 'Friern Barnet', 'Garden Suburb', 'Golders Green', 'Hendon',
             'High Barnet', 'Mill Hill', 'Totteridge and Woodside', 
         'Underhill', 'West Finchley', 'West Hendon', 'Whetstone', 'Woodhouse']

image_filename = 'C:/Users/20212828/Downloads/BarnetWardsMap.jpg'
encoded_image = base64.b64encode(open(image_filename, 'rb').read())


# Create Dash app
app = JupyterDash(__name__)
# Define app layout
app.layout = html.Div([
    html.H1("Predicted Burglaries for all wards in Barnet", style={'text-align': 'center', 'margin-top': '30px', 'font-size': '45px', 'font-family': 'Arial, sans-serif'}),
    html.Div(
        style={'display': 'flex', 'justify-content': 'center', 'flex-direction': 'column', 'align-items': 'center'},
        children=[
            html.Div(
                style={'text-align': 'center'},
                children=[
                    html.Img(src='data:image/png;base64,{}'.format(encoded_image.decode()), alt='Image Description'),
                    dcc.Dropdown(
                        options=wards,
                        value='Barnet Vale',
                        id='filter_ward',
                        style={'font-size': '24px'}
                    ),
                    dcc.Dropdown(
                        options=[{'label': str(date), 'value': date} for date in df_ward_future_pred['ds']],
                        value=df_ward_future_pred['ds'][0],
                        id='select',
                        style={'font-size': '24px'}
                    ),
                    html.Div(id='output-container', style={'font-size': '20px'})
                ]
            ),
            dcc.Graph(id='figward', figure=figward, style={'width': '1500px', 'height': '600px'})
        ]
    ),
    html.Div(
        style={'display': 'flex', 'justify-content': 'center'},
        children=[
            dcc.Graph(id='fig1', figure=fig, style={'width': '1500px', 'height': '600px'})
        ]
    ),
    html.Div(
        style={'display': 'flex', 'justify-content': 'center'},
        children=[
            dcc.Graph(id='fig2', figure=figure1, style={'width': '1500px', 'height': '600px'})
        ]
    ),
    html.Div(
        style={'display': 'flex', 'justify-content': 'center'},
        children=[
            dcc.Graph(id='figw', figure=fig_w, style={'width': '1500px', 'height': '600px'})
        ]
    )
])


@app.callback(
    Output('figward', 'figure'),
    [Input('filter_ward', 'value')],
    [Input("select", "value")]
)
def update_output(selected_option, dateselect):
    holiday = pd.DataFrame([])
    for date, name in sorted(holidays.country_holidays('GB', subdiv='England', years=[2012, 2013, 2014, 2015, 2016, 2017, 2018,2019,2020,2021, 2022, 2023, 2024]).items()):
        holiday= pd.concat([holiday, pd.DataFrame({'ds': date, 'holiday': "England-Holidays"}, index=[0])], ignore_index=True)

    holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

    ward = selected_option
    df_ward = df_per_ward[df_per_ward['Ward']==ward]
    df_ward = df_ward[['ds', 'Date', 'year', 'y', 'Population per square kilometre']]
    #print('df ward', df_ward)
    # Merge with all dates to have a consistent dataframe
    df_ward = pd.merge(dates, df_ward, how="left", on='Date')
    df_ward['ds'] = df_ward['Date']
    df_ward['y'] = df_ward['y'].fillna(0)
    df_ward = df_ward.fillna(df_ward.mean())
    df_ward['year'] = pd.DatetimeIndex(df_ward['Date']).year
    train_ward = df_ward[df_ward['year']<(training_upto+1)]
    test_ward = df_ward[(df_ward['year']>training_upto)] 
    #print(df_ward.columns)
    best_model = best_model = Prophet(seasonality_mode= 'additive', changepoint_prior_scale= 0.01, seasonality_prior_scale= 0.15, holidays_prior_scale= 0.015, holidays= holiday)
    best_model.add_regressor('Population per square kilometre')
    best_model.fit(train_ward)
    future_dates = best_model.make_future_dataframe(periods=forecast_upto, freq='MS')
    #print(future_dates)
    future_dates = pd.merge(future_dates, df_ward, how="left", on='ds')
    forecastward = best_model.predict(future_dates)
    #print(ward)
    #print(r2_score(test_ward[['y']], forecast['yhat'][-forecast_upto:]))

    df_ward_true = df_ward[['ds', 'y']]

    df_ward_pred = forecastward[-forecast_upto:][['ds', 'yhat']]
    df_ward_true = df_ward_true[(df_ward_true != 0).all(1)]

    figward = px.line(df_ward_true, x='ds', y='y', title='Model predictions for selected ward in Barnet')
    figward.add_scatter(x=df_ward_pred['ds'], y=df_ward_pred['yhat'], name='Prediction')
    figward.update_xaxes(title_text='Date')
    figward.update_yaxes(title_text='Number of Predicted Burglaries')

    return (figward)

@app.callback(
    Output('output-container', 'children'),
    [Input('filter_ward', 'value')],
    [Input("select", "value")]
)
def update_output_div(selected_option, dateselect):
    holiday = pd.DataFrame([])
    for date, name in sorted(holidays.country_holidays('GB', subdiv='England', years=[2012, 2013, 2014, 2015, 2016, 2017, 2018,2019,2020,2021, 2022, 2023, 2024]).items()):
        holiday= pd.concat([holiday, pd.DataFrame({'ds': date, 'holiday': "England-Holidays"}, index=[0])], ignore_index=True)

    holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')

    ward = selected_option
    df_ward = df_per_ward[df_per_ward['Ward']==ward]
    df_ward = df_ward[['ds', 'Date', 'year', 'y', 'Population per square kilometre']]

    df_ward = pd.merge(dates, df_ward, how="left", on='Date')
    df_ward['ds'] = df_ward['Date']
    df_ward['y'] = df_ward['y'].fillna(0)
    df_ward = df_ward.fillna(df_ward.mean())
    df_ward['year'] = pd.DatetimeIndex(df_ward['Date']).year
    train_ward = df_ward[df_ward['year']<(training_upto+1)]
    test_ward = df_ward[(df_ward['year']>training_upto)] 

    best_model = best_model = Prophet(seasonality_mode= 'additive', changepoint_prior_scale= 0.01, seasonality_prior_scale= 0.15, holidays_prior_scale= 0.015, holidays= holiday)
    best_model.add_regressor('Population per square kilometre')
    best_model.fit(train_ward)
    future_dates = best_model.make_future_dataframe(periods=forecast_upto, freq='MS')

    future_dates = pd.merge(future_dates, df_ward, how="left", on='ds')
    forecastward = best_model.predict(future_dates)

    df_ward_true = df_ward[['ds', 'y']]

    df_ward_pred = forecastward[-forecast_upto:][['ds', 'yhat']]
    df_ward_true = df_ward_true[(df_ward_true != 0).all(1)]

    figward = px.line(df_ward_true, x='ds', y='y')
    figward.add_scatter(x=df_ward_pred['ds'], y=df_ward_pred['yhat'])

    df_ward_future_pred = df_ward_pred[df_ward_pred['ds'] > '2022-12-31']
    df_ward_future_pred = df_ward_future_pred.reset_index()
    df_ward_future_pred = df_ward_future_pred.drop(['index'], axis=1)
    df_ward_future_pred['yhat'] = df_ward_future_pred['yhat'].round()
    df_ward_future_pred['yhat'] = df_ward_future_pred['yhat'].fillna(0).astype(int)
    df_ward_future_pred['ds'] = df_ward_future_pred['ds'].values.astype(str)
    df_ward_future_pred['ds'] = df_ward_future_pred['ds'].str.split('T')
    df_ward_future_pred['ds'] = df_ward_future_pred['ds']
    df_ward_future_pred['ds'] = df_ward_future_pred['ds'].apply(lambda x: str(x[0]))

    # Calculate the value of 'burglaries'

    burglariesdf = df_ward_future_pred[df_ward_future_pred['ds'] == dateselect]
    burglaries = burglariesdf['yhat'].iloc[0] 
    if burglaries < 0:
         burglaries = 0

    hours = 6000
    df_pred_hours = df_plot_pred.copy()
    for ward in wards:
        df_pred_hours[ward] = (df_pred_hours[ward]/df_pred_hours['yhat']) * hours
        df_pred_hours[ward] = df_pred_hours[ward].apply(lambda x: 0 if x < 0 else x)
        df_pred_hours[ward] = df_pred_hours[ward].round()
        df_pred_hours[ward] = df_pred_hours[ward].fillna(0).astype(int)

    df_pred_hours

    value = df_pred_hours.loc[df_pred_hours['ds'] == dateselect, selected_option].values[0]

    # Create the HTML content
    content = html.H3(f"There are {str(burglaries)} burglaries predicted in the month {dateselect} in ward {selected_option}. This will require {value} hours per month in this ward")

    return content

    
if __name__ == '__main__':
        app.run_server(debug=True, port=8052)

Dash is running on http://127.0.0.1:8052/

Dash app running on http://127.0.0.1:8052/
