# Notebook to analyse the results from the pilot

Mainly focussed on the stored information in the database

In [None]:
import os

import matplotlib.pyplot as plt
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

## Connect to Database

In [None]:
load_dotenv("../.env")


# Global and env variables
db_user = os.environ["DB_USER"]
db_passwd = os.environ["DB_PASSWD"]
db_host = os.environ["DB_HOST"]
db_port = os.environ["DB_PORT"]
db_database = os.environ["DB_DATABASE"]

In [None]:
CONNECTSTRING = (
    rf"mssql+pymssql://{db_user}:{db_passwd}@{db_host}:{db_port}/{db_database}"
)
engine = create_engine(CONNECTSTRING)
session_object = sessionmaker(bind=engine)

In [None]:
call_response = pd.read_sql_table("apicallresponse", engine, schema="noshow")
prediction = pd.read_sql_table("apiprediction", engine, schema="noshow")
requests = pd.read_sql_table("apirequest", engine, schema="noshow")

## Preprocess data

In [None]:
requests

In [None]:
prediction_call = prediction.merge(
    call_response, left_on="id", right_on="prediction_id"
)
prediction_call = prediction_call.loc[prediction_call["start_time"] >= "2023-10-02"]
prediction_call["start_time"] = prediction_call["start_time"].dt.date
result_per_day = prediction_call.groupby(
    ["start_time", "clinic_name", "call_outcome"]
).size()

## Analyze data

In [None]:
plt.figure(figsize=(10, 5))
result_per_day.groupby("call_outcome").sum().plot.bar()
plt.title("Uitkomst Pilot")
plt.xlabel("")
plt.show()

In [None]:
result_per_day.loc[:, "Kind-KNO"].unstack().plot.bar()

In [None]:
prediction_call.groupby("clinic_name").size()

In [None]:
prediction_call.groupby(["clinic_name", "call_outcome"]).size()

In [None]:
prediction_call.value_counts("call_status")

In [None]:
prediction_call["remarks"].drop_duplicates().values

In [None]:
prediction_call_request = prediction_call.merge(
    requests, left_on="request_id", right_on="id"
)
prediction_call_request

In [None]:
prediction_call_request["day_called"] = prediction_call_request["timestamp"].dt.date
prediction_call_request.groupby("day_called").size()

## Analyse text

In [None]:
text_contents = prediction_call["remarks"].drop_duplicates().values
text_contents = [val for val in text_contents if "oicemail" not in val]
text_contents = [val for val in text_contents if "oorverbin" not in val]
text_contents = [val for val in text_contents if "eschikbaar" not in val]
text_contents

Uitkomst open tekstvelden:
- Wegens omstandigheden niet zeker of afspraak door kan gaan: 9
- Niet/Laat op de hoogte van afspraak: 17
- Positief: 3
- Negatief: 2

Veel mensen hebben moeite met digitaal portaal en als ze dan ook geen brief krijgen, hebben ze het overzicht niet.
Daarnaast ook veel verkeerde nummers in systeem.