In [1]:
import pandas as pd
import numpy as np
import random
import statsmodels.api as sm
from datetime import datetime, timedelta
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import r2_score
import plotly.graph_objects as go
import pickle

In [2]:
import random
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Define the mode_of_arrival options
mode_of_arrival_options = ['Ambulance', 'Own Vehicle', 'Walk In', 'Other']

# Define the disposition options
disposition_options = ['HOME', 'ADMITTED', 'ELOPED', 'LEFT WITHOUT BEING SEEN', 'OTHER', 'LEFT AGAINST MEDICAL ADVICE', 'EXPIRED']

# Define the gender options
gender_options = ['M', 'F']

# Set a seed for reproducibility (optional)
random.seed(2020)

desired_total_count = 34730

# List of values specifying the number of IDs to generate for each month
original_num_ids_list = [15.42, 15.23, 15.30, 15.82, 15.77, 15.98, 15.72, 16.18, 16.27, 16.24, 16.07, 16.28]

# Calculate the total number of IDs generated in a year
total_ids_in_year = sum(original_num_ids_list)

# Define the scaling factor
scaling_factor = desired_total_count / total_ids_in_year

print('Scaling Factor:', scaling_factor)

# Calculate the scaled number of IDs for each month
num_ids_list = [int(original_num_ids * scaling_factor) for original_num_ids in original_num_ids_list]

# Define the start and end years, including a leap year
start_year = 2015
end_year = 2020

# Create an empty list to store data
data = []

# Initialize a linear trend factor
linear_trend_factor = 1

# Loop through each year
for year in range(start_year, end_year + 1):
    # Introduce a random percentage increase between 20% and 50%
    percentage_increase = random.uniform(0.30, 0.50)
    print('% increase:', percentage_increase)
    num_ids_list_with_increase = [int(num_ids * (1 + percentage_increase)) for num_ids in num_ids_list]

    # Apply the linear trend factor
    num_ids_list_with_trend = [int(num_ids * linear_trend_factor) for num_ids in num_ids_list_with_increase]

    # Adjust the linear trend factor for the next year (you can customize this)
    linear_trend_factor += 0.05  # Adjust the trend factor as needed

    # Determine if the current year is a leap year
    is_leap_year = (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)

    # Calculate the number of patients with 'Transfer_date' for the current year
    total_admissions = sum(num_ids_list_with_trend)
    num_transfer_patients = int(total_admissions * 0.035)

    # Calculate the number of transfer patients for the current year
    num_transfer_patients_yearly = int(total_admissions * 0.035)

    # Spread out transfer patients throughout the year
    transfer_month_distribution = [int(num_transfer_patients_yearly / 12)] * 12
    remaining_transfers = num_transfer_patients_yearly % 12

    # Distribute the remaining transfer patients randomly across the months
    for _ in range(remaining_transfers):
        random_month = random.randint(0, 11)
        transfer_month_distribution[random_month] += 1

    # Loop through each month
    for month, num_ids in enumerate(num_ids_list_with_trend, start=1):
        # Introduce seasonality by varying the number of IDs based on the month
        seasonality_factor = 1 + np.sin(2 * np.pi * (month - 1) / 12)  # Adjust the seasonality factor as needed
        num_ids = int(num_ids * seasonality_factor)

        # Generate random IDs, start dates, end dates, mode_of_arrival, and disposition for the current month
        for _ in range(num_ids):
            day = random.randint(1, 28)  # Generate a random day within the month
            start_date = datetime(year, month, day)
            end_date = start_date + timedelta(days=random.randint(1, 30))  # Generate a random end date within 30 days
            id_value = random.randint(1000000, 99999999)  # Generate 8 digits Patient IDs
            arrival_mode = random.choice(mode_of_arrival_options)
            gender = random.choice(gender_options)

            # Determine if this patient should have a 'Transfer_date'
            if transfer_month_distribution[month - 1] > 0:
                transfer_date = start_date
                transfer_month_distribution[month - 1] -= 1
                disposition = 'TRANSFER'
            else:
                transfer_date = None
                disposition = random.choice(disposition_options)

            if transfer_date == start_date:
                transfer_type = 'Transfer Admissions/Incoming Transfers'
            else:
                transfer_type = ''

            data.append([start_date, end_date, transfer_date, id_value, arrival_mode, disposition, transfer_type, gender])

# Create a Pandas DataFrame from the data
master_data = pd.DataFrame(data, columns=["Admission Date", "Discharge Date", "Transfer Date", "Patient ID", "Mode of Arrival", "Disposition", "Transfer Type", "Gender"])

master_data.to_csv('ADT dataset - Streamlit V1.csv')

# Assuming 'master_data' is your DataFrame
# Convert the 'Admission Date' column to datetime if it's not already
master_data['Admission Date'] = pd.to_datetime(master_data['Admission Date'])

# Extract the year from the 'Admission Date' column
master_data['Year'] = master_data['Admission Date'].dt.year

# Group by year and count admissions
yearly_admissions = master_data.groupby('Year')['Patient ID'].count()

yearly_admissions_percentage_increase = yearly_admissions.pct_change() * 100

print(yearly_admissions)
# Print the results
print(yearly_admissions_percentage_increase)

Scaling Factor: 182.5204961109943
% increase: 0.4239338541321323
% increase: 0.3074988430156789
% increase: 0.3708261589685712
% increase: 0.4976813389239167
% increase: 0.3975210925249858
% increase: 0.4219594918201598
Year
2015    48878
2016    47117
2017    51754
2018    59118
2019    57559
2020    61011
Name: Patient ID, dtype: int64
Year
2015          NaN
2016    -3.602848
2017     9.841458
2018    14.228852
2019    -2.637099
2020     5.997324
Name: Patient ID, dtype: float64


In [3]:
master_data['Admission Date'] = pd.to_datetime(master_data['Admission Date'])

# Extract the year from the 'Admission Date' column
master_data['Year'] = master_data['Admission Date'].dt.year

# Group by year and count admissions
yearly_admissions = master_data.groupby('Year')['Patient ID'].count()

yearly_admissions

Year
2015    48878
2016    47117
2017    51754
2018    59118
2019    57559
2020    61011
Name: Patient ID, dtype: int64

# Admissions

In [4]:
# Training dataset:
training_data = master_data[(master_data['Admission Date'] >= '2015-01-01') & (master_data['Admission Date'] <= '2020-11-30')]
train_data = training_data.groupby(training_data["Admission Date"])["Patient ID"].count().reset_index()
train_data.columns = ["Admission Date", "patient_count"]
print('Training data:\n',train_data.tail(10))
train_df = train_data["patient_count"]
train_df.name = "patient_count"
print('\n Training data count:\n',train_df)


# Testng dataset:
testing_data = master_data[(master_data['Admission Date'] >= '2020-12-16') & (master_data['Admission Date'] <= '2020-12-31')]
test_data = testing_data.groupby(testing_data["Admission Date"])["Patient ID"].count().reset_index()
test_data.columns = ["Admission Date", "patient_count"]
print('Testing data:\n',test_data.tail(10))
test_df = test_data["patient_count"]
test_df.name = "patient_count"
print('\n Testing data count:\n',test_df)

Training data:
      Admission Date  patient_count
1810     2020-11-19             30
1811     2020-11-20             30
1812     2020-11-21             29
1813     2020-11-22             24
1814     2020-11-23             31
1815     2020-11-24             26
1816     2020-11-25             20
1817     2020-11-26             26
1818     2020-11-27             16
1819     2020-11-28             19

 Training data count:
 0       135
1       156
2       138
3       143
4       151
       ... 
1815     26
1816     20
1817     26
1818     16
1819     19
Name: patient_count, Length: 1820, dtype: int64
Testing data:
    Admission Date  patient_count
3      2020-12-19             84
4      2020-12-20             95
5      2020-12-21             67
6      2020-12-22             88
7      2020-12-23            106
8      2020-12-24             96
9      2020-12-25            104
10     2020-12-26             98
11     2020-12-27             87
12     2020-12-28            105

 Testing data co

In [5]:
train_data = train_df
test_data = test_df

data = train_data

# Define the order of the SARIMA model (p, d, q), (P, D, Q, S)
# p: AutoRegressive order
# d: Differencing order
# q: Moving Average order
# P: Seasonal AutoRegressive order
# D: Seasonal Differencing order
# Q: Seasonal Moving Average order
# S: Seasonal period (e.g., 12 for monthly data with yearly seasonality)
p, d, q = 2, 2, 1
P, D, Q, S = 3, 3, 3, 12

# Fit the SARIMAX model to your data
admissions_model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = admissions_model.fit()

  warn('Non-invertible starting seasonal moving average'


In [6]:
with open('admission_model.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)

# Discharges

In [7]:
master_data['Discharge Date'] = pd.to_datetime(master_data['Discharge Date'])

# Extract the year from the 'Admission Date' column
master_data['Year'] = master_data['Discharge Date'].dt.year

# Group by year and count admissions
yearly_admissions = master_data.groupby('Year')['Patient ID'].count()

yearly_admissions

Year
2015    47896
2016    47167
2017    51662
2018    58959
2019    57650
2020    60904
2021     1199
Name: Patient ID, dtype: int64

In [8]:
# Training dataset:
training_data_discharge = master_data[(master_data['Discharge Date'] >= '2015-01-01') & (master_data['Discharge Date'] <= '2020-11-30')]
train_data_discharge = training_data_discharge.groupby(training_data["Discharge Date"])["Patient ID"].count().reset_index()
train_data_discharge.columns = ["Discharge Date", "patient_count"]
print('Training data:\n',train_data_discharge.head(10))
train_df_discharge = train_data_discharge["patient_count"]
train_df_discharge.name = "patient_count"
print('\n Training data count:\n',train_df)


# Testng dataset:
testing_data_discharge = master_data[(master_data['Discharge Date'] >= '2020-12-01') & (master_data['Discharge Date'] <= '2020-12-31')]
test_data_discharge = testing_data_discharge.groupby(testing_data["Discharge Date"])["Patient ID"].count().reset_index()
test_data_discharge.columns = ["Discharge Date", "patient_count"]
print('Testing data:\n',test_data_discharge.tail(10))
test_df_discharge = test_data_discharge["patient_count"]
test_df_discharge.name = "patient_count"
print('\n Testing data count:\n',test_df)

Training data:
   Discharge Date  patient_count
0     2015-01-02             12
1     2015-01-03              7
2     2015-01-04             16
3     2015-01-05             16
4     2015-01-06             19
5     2015-01-07             18
6     2015-01-08             32
7     2015-01-09             32
8     2015-01-10             45
9     2015-01-11             46

 Training data count:
 0       135
1       156
2       138
3       143
4       151
       ... 
1815     26
1816     20
1817     26
1818     16
1819     19
Name: patient_count, Length: 1820, dtype: int64
Testing data:
    Discharge Date  patient_count
5      2020-12-22             17
6      2020-12-23             17
7      2020-12-24             22
8      2020-12-25             25
9      2020-12-26             25
10     2020-12-27             45
11     2020-12-28             39
12     2020-12-29             40
13     2020-12-30             33
14     2020-12-31             30

 Testing data count:
 0      91
1      77
2      

In [9]:
train_data = train_df_discharge
test_data = test_df_discharge

# Load your time series data into a pandas DataFrame
# Replace 'your_data.csv' with your data file
#data = pd.read_csv('ADT datset V1.csv')
#data['Date'] = pd.to_datetime(data['Date'])  # Make sure the 'Date' column is in datetime format
#data.set_index('Date', inplace=True)  # Set 'Date' as the index

data = train_data

# Define the order of the SARIMA model (p, d, q), (P, D, Q, S)
# p: AutoRegressive order
# d: Differencing order
# q: Moving Average order
# P: Seasonal AutoRegressive order
# D: Seasonal Differencing order
# Q: Seasonal Moving Average order
# S: Seasonal period (e.g., 12 for monthly data with yearly seasonality)
p, d, q = 2, 2, 3
P, D, Q, S = 3, 3, 3, 12

# Fit the SARIMAX model to your data
model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = model.fit()

  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting seasonal moving average'


In [10]:
with open('discharge_model.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)

# Transfers

In [11]:
master_data['Transfer Date'] = pd.to_datetime(master_data['Transfer Date'])

# Extract the year from the 'Admission Date' column
master_data['Year'] = master_data['Transfer Date'].dt.year

# Group by year and count admissions
yearly_transfers = master_data.groupby('Year')['Patient ID'].count()

yearly_transfers

Year
2015.0    1586
2016.0    1529
2017.0    1679
2018.0    1918
2019.0    1867
2020.0    1976
Name: Patient ID, dtype: int64

In [12]:
# Training dataset:
training_data_transfer = master_data[(master_data['Transfer Date'] >= '2015-01-01') & (master_data['Transfer Date'] <= '2020-11-30')]
train_data_transfer = training_data_transfer.groupby(training_data["Transfer Date"])["Patient ID"].count().reset_index()
train_data_transfer.columns = ["Transfer Date", "patient_count"]
print('Training data:\n',train_data_transfer.head(10))
train_df_transfer = train_data_transfer["patient_count"]
train_df_transfer.name = "patient_count"
print('\n Training data count:\n',train_df)


# Testng dataset:
testing_data_transfer = master_data[(master_data['Transfer Date'] >= '2020-12-01') & (master_data['Transfer Date'] <= '2020-12-31')]
test_data_transfer = testing_data_transfer.groupby(testing_data_transfer["Transfer Date"])["Patient ID"].count().reset_index()
test_data_transfer.columns = ["Transfer Date", "patient_count"]
print('Testing data:\n',test_data_transfer.tail(10))
test_df_transfer = test_data_transfer["patient_count"]
test_df_transfer.name = "patient_count"
print('\n Testing data count:\n',test_df_transfer)

Training data:
   Transfer Date  patient_count
0    2015-01-01              3
1    2015-01-02              9
2    2015-01-03              8
3    2015-01-04              4
4    2015-01-05              4
5    2015-01-06              3
6    2015-01-07              6
7    2015-01-08              8
8    2015-01-09              2
9    2015-01-10              3

 Training data count:
 0       135
1       156
2       138
3       143
4       151
       ... 
1815     26
1816     20
1817     26
1818     16
1819     19
Name: patient_count, Length: 1820, dtype: int64
Testing data:
    Transfer Date  patient_count
18    2020-12-19              7
19    2020-12-20              7
20    2020-12-21              3
21    2020-12-22              5
22    2020-12-23              4
23    2020-12-24              5
24    2020-12-25              3
25    2020-12-26              8
26    2020-12-27              6
27    2020-12-28              6

 Testing data count:
 0     11
1      5
2      8
3      7
4      6
5   

In [13]:
train_data = train_df_transfer
test_data = test_df_transfer

# Load your time series data into a pandas DataFrame
# Replace 'your_data.csv' with your data file
#data = pd.read_csv('ADT datset V1.csv')
#data['Date'] = pd.to_datetime(data['Date'])  # Make sure the 'Date' column is in datetime format
#data.set_index('Date', inplace=True)  # Set 'Date' as the index

data = train_data

# Define the order of the SARIMA model (p, d, q), (P, D, Q, S)
# p: AutoRegressive order
# d: Differencing order
# q: Moving Average order
# P: Seasonal AutoRegressive order
# D: Seasonal Differencing order
# Q: Seasonal Moving Average order
# S: Seasonal period (e.g., 12 for monthly data with yearly seasonality)
p, d, q = 2, 2, 2
P, D, Q, S = 2, 2, 2, 12

# Fit the SARIMAX model to your data
model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = model.fit()

  warn('Non-invertible starting MA parameters found.'


In [14]:
with open('transfer_model.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)