In [9]:
import pandas as pd
import numpy as np
import random
import statsmodels.api as sm
from datetime import datetime, timedelta
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import r2_score
import plotly.graph_objects as go
import pickle

In [10]:
import random
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Define the mode_of_arrival options
mode_of_arrival_options = ['Ambulance', 'Own Vehicle', 'Walk In', 'Other']

# Define the disposition options
disposition_options = ['HOME', 'ADMITTED', 'ELOPED', 'LEFT WITHOUT BEING SEEN', 'OTHER', 'LEFT AGAINST MEDICAL ADVICE', 'EXPIRED']

# Define the gender options
gender_options = ['M', 'F']

# Set a seed for reproducibility (optional)
random.seed(2020)

desired_total_count = 34730

# List of values specifying the number of IDs to generate for each month
original_num_ids_list = [15.42, 15.23, 15.30, 15.82, 15.77, 15.98, 15.72, 16.18, 16.27, 16.24, 16.07, 16.28]

# Calculate the total number of IDs generated in a year
total_ids_in_year = sum(original_num_ids_list)

# Define the scaling factor
scaling_factor = desired_total_count / total_ids_in_year

print('Scaling Factor:', scaling_factor)

# Calculate the scaled number of IDs for each month
num_ids_list = [int(original_num_ids * scaling_factor) for original_num_ids in original_num_ids_list]

# Define the start and end years, including a leap year
start_year = 2015
end_year = 2020

# Create an empty list to store data
data = []

# Initialize a linear trend factor
linear_trend_factor = 1

# Loop through each year
for year in range(start_year, end_year + 1):
    # Introduce a random percentage increase between 20% and 50%
    percentage_increase = random.uniform(0.30, 0.50)
    print('% increase:', percentage_increase)
    num_ids_list_with_increase = [int(num_ids * (1 + percentage_increase)) for num_ids in num_ids_list]

    # Apply the linear trend factor
    num_ids_list_with_trend = [int(num_ids * linear_trend_factor) for num_ids in num_ids_list_with_increase]

    # Adjust the linear trend factor for the next year (you can customize this)
    linear_trend_factor += 0.05  # Adjust the trend factor as needed

    # Determine if the current year is a leap year
    is_leap_year = (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)

    # Calculate the number of patients with 'Transfer_date' for the current year
    total_admissions = sum(num_ids_list_with_trend)
    num_transfer_patients = int(total_admissions * 0.035)

    # Calculate the number of transfer patients for the current year
    num_transfer_patients_yearly = int(total_admissions * 0.035)

    # Spread out transfer patients throughout the year
    transfer_month_distribution = [int(num_transfer_patients_yearly / 12)] * 12
    remaining_transfers = num_transfer_patients_yearly % 12

    # Distribute the remaining transfer patients randomly across the months
    for _ in range(remaining_transfers):
        random_month = random.randint(0, 11)
        transfer_month_distribution[random_month] += 1

    # Loop through each month
    for month, num_ids in enumerate(num_ids_list_with_trend, start=1):
        # Introduce seasonality by varying the number of IDs based on the month
        seasonality_factor = 1 + np.sin(2 * np.pi * (month - 1) / 12)  # Adjust the seasonality factor as needed
        num_ids = int(num_ids * seasonality_factor)

        # Generate random IDs, start dates, end dates, mode_of_arrival, and disposition for the current month
        for _ in range(num_ids):
            day = random.randint(1, 28)  # Generate a random day within the month
            start_date = datetime(year, month, day)
            end_date = start_date + timedelta(days=random.randint(1, 30))  # Generate a random end date within 30 days
            id_value = random.randint(1000000, 99999999)  # Generate 8 digits Patient IDs
            arrival_mode = random.choice(mode_of_arrival_options)
            gender = random.choice(gender_options)

            # Determine if this patient should have a 'Transfer_date'
            if transfer_month_distribution[month - 1] > 0:
                transfer_date = start_date
                transfer_month_distribution[month - 1] -= 1
                disposition = 'TRANSFER'
            else:
                transfer_date = None
                disposition = random.choice(disposition_options)

            if transfer_date == start_date:
                transfer_type = 'Transfer Admissions/Incoming Transfers'
            else:
                transfer_type = ''

            data.append([start_date, end_date, transfer_date, id_value, arrival_mode, disposition, transfer_type, gender])

# Create a Pandas DataFrame from the data
master_data = pd.DataFrame(data, columns=["Admission Date", "Discharge Date", "Transfer Date", "Patient ID", "Mode of Arrival", "Disposition", "Transfer Type", "Gender"])

master_data.to_csv('ADT dataset - Streamlit V1.csv')

# Assuming 'master_data' is your DataFrame
# Convert the 'Admission Date' column to datetime if it's not already
master_data['Admission Date'] = pd.to_datetime(master_data['Admission Date'])

# Extract the year from the 'Admission Date' column
master_data['Year'] = master_data['Admission Date'].dt.year

# Group by year and count admissions
yearly_admissions = master_data.groupby('Year')['Patient ID'].count()

yearly_admissions_percentage_increase = yearly_admissions.pct_change() * 100

print(yearly_admissions)
# Print the results
print(yearly_admissions_percentage_increase)

Scaling Factor: 182.5204961109943
% increase: 0.4239338541321323
% increase: 0.3074988430156789
% increase: 0.3708261589685712
% increase: 0.4976813389239167
% increase: 0.3975210925249858
% increase: 0.4219594918201598
Year
2015    48878
2016    47117
2017    51754
2018    59118
2019    57559
2020    61011
Name: Patient ID, dtype: int64
Year
2015          NaN
2016    -3.602848
2017     9.841458
2018    14.228852
2019    -2.637099
2020     5.997324
Name: Patient ID, dtype: float64


In [15]:
master_data['Admission Date'] = pd.to_datetime(master_data['Admission Date'])

# Extract the year from the 'Admission Date' column
master_data['Year'] = master_data['Admission Date'].dt.year

# Group by year and count admissions
yearly_admissions = master_data.groupby('Year')['Patient ID'].count()

yearly_admissions

Year
2015    48878
2016    47117
2017    51754
2018    59118
2019    57559
2020    61011
Name: Patient ID, dtype: int64

# Admissions

In [16]:
# Training dataset:
training_data = master_data[(master_data['Admission Date'] >= '2015-01-01') & (master_data['Admission Date'] <= '2020-11-30')]
train_data = training_data.groupby(training_data["Admission Date"])["Patient ID"].count().reset_index()
train_data.columns = ["Admission Date", "patient_count"]
print('Training data:\n',train_data.tail(10))
train_df = train_data["patient_count"]
train_df.name = "patient_count"
print('\n Training data count:\n',train_df)


# Testng dataset:
testing_data = master_data[(master_data['Admission Date'] >= '2020-12-16') & (master_data['Admission Date'] <= '2020-12-31')]
test_data = testing_data.groupby(testing_data["Admission Date"])["Patient ID"].count().reset_index()
test_data.columns = ["Admission Date", "patient_count"]
print('Testing data:\n',test_data.tail(10))
test_df = test_data["patient_count"]
test_df.name = "patient_count"
print('\n Testing data count:\n',test_df)

Training data:
      Admission Date  patient_count
1810     2020-11-19             30
1811     2020-11-20             30
1812     2020-11-21             29
1813     2020-11-22             24
1814     2020-11-23             31
1815     2020-11-24             26
1816     2020-11-25             20
1817     2020-11-26             26
1818     2020-11-27             16
1819     2020-11-28             19

 Training data count:
 0       135
1       156
2       138
3       143
4       151
       ... 
1815     26
1816     20
1817     26
1818     16
1819     19
Name: patient_count, Length: 1820, dtype: int64
Testing data:
    Admission Date  patient_count
3      2020-12-19             84
4      2020-12-20             95
5      2020-12-21             67
6      2020-12-22             88
7      2020-12-23            106
8      2020-12-24             96
9      2020-12-25            104
10     2020-12-26             98
11     2020-12-27             87
12     2020-12-28            105

 Testing data co

In [17]:
train_data = train_df
test_data = test_df

data = train_data

# Define the order of the SARIMA model (p, d, q), (P, D, Q, S)
# p: AutoRegressive order
# d: Differencing order
# q: Moving Average order
# P: Seasonal AutoRegressive order
# D: Seasonal Differencing order
# Q: Seasonal Moving Average order
# S: Seasonal period (e.g., 12 for monthly data with yearly seasonality)
p, d, q = 2, 2, 1
P, D, Q, S = 3, 3, 3, 12

# Fit the SARIMAX model to your data
admissions_model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = admissions_model.fit()

  warn('Non-invertible starting seasonal moving average'


In [9]:
with open('admission_model1.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)

In [7]:
import joblib 

# save model with joblib 
filename = 'joblib_model_admissions.sav'
joblib.dump(results, filename)

['joblib_model_admissions.sav']

In [8]:
import zipfile

# Define the name of the zip file you want to create
zip_filename = 'joblib_model_admissions.zip'

# Create a new zip file and add the model file to it
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write(filename, arcname=filename)

# Discharges

In [32]:
master_data['Discharge Date'] = pd.to_datetime(master_data['Discharge Date'])

# Extract the year from the 'Admission Date' column
master_data['Year'] = master_data['Discharge Date'].dt.year

# Group by year and count admissions
yearly_discharges = master_data.groupby('Year')['Patient ID'].count()

yearly_discharges

Year
2015    47896
2016    47167
2017    51662
2018    58959
2019    57650
2020    60904
2021     1199
Name: Patient ID, dtype: int64

In [31]:
# Training dataset:
training_data_discharge = master_data[(master_data['Discharge Date'] >= '2015-01-01') & (master_data['Discharge Date'] <= '2020-11-30')]
train_data_discharge = training_data_discharge.groupby(training_data["Discharge Date"])["Patient ID"].count().reset_index()
train_data_discharge.columns = ["Discharge Date", "patient_count"]
print('Training data:\n',train_data_discharge.head(10))
train_df_discharge = train_data_discharge["patient_count"]
train_df_discharge.name = "patient_count"
print('\n Training data count:\n',train_df)


# Testng dataset:
testing_data_discharge = master_data[(master_data['Discharge Date'] >= '2020-12-01') & (master_data['Discharge Date'] <= '2020-12-31')]
test_data_discharge = testing_data_discharge.groupby(testing_data["Discharge Date"])["Patient ID"].count().reset_index()
test_data_discharge.columns = ["Discharge Date", "patient_count"]
print('Testing data:\n',test_data_discharge.tail(10))
test_df_discharge = test_data_discharge["patient_count"]
test_df_discharge.name = "patient_count"
print('\n Testing data count:\n',test_df)

Training data:
   Discharge Date  patient_count
0     2015-01-02             12
1     2015-01-03              7
2     2015-01-04             16
3     2015-01-05             16
4     2015-01-06             19
5     2015-01-07             18
6     2015-01-08             32
7     2015-01-09             32
8     2015-01-10             45
9     2015-01-11             46

 Training data count:
 0       135
1       156
2       138
3       143
4       151
       ... 
1815     26
1816     20
1817     26
1818     16
1819     19
Name: patient_count, Length: 1820, dtype: int64
Testing data:
    Discharge Date  patient_count
5      2020-12-22             17
6      2020-12-23             17
7      2020-12-24             22
8      2020-12-25             25
9      2020-12-26             25
10     2020-12-27             45
11     2020-12-28             39
12     2020-12-29             40
13     2020-12-30             33
14     2020-12-31             30

 Testing data count:
 0      91
1      77
2      

In [19]:
train_data = train_df_discharge
test_data = test_df_discharge

# Load your time series data into a pandas DataFrame
# Replace 'your_data.csv' with your data file
#data = pd.read_csv('ADT datset V1.csv')
#data['Date'] = pd.to_datetime(data['Date'])  # Make sure the 'Date' column is in datetime format
#data.set_index('Date', inplace=True)  # Set 'Date' as the index

data = train_data

# Define the order of the SARIMA model (p, d, q), (P, D, Q, S)
# p: AutoRegressive order
# d: Differencing order
# q: Moving Average order
# P: Seasonal AutoRegressive order
# D: Seasonal Differencing order
# Q: Seasonal Moving Average order
# S: Seasonal period (e.g., 12 for monthly data with yearly seasonality)
p, d, q = 2, 2, 3
P, D, Q, S = 3, 3, 3, 12

# Fit the SARIMAX model to your data
model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = model.fit()

  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting seasonal moving average'


In [28]:
with open('discharge_model.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)

In [None]:
import joblib 

# save model with joblib 
filename = 'joblib_model_discharge.sav'
joblib.dump(result, filename)

import zipfile

# Define the name of the zip file you want to create
zip_filename = 'joblib_model_discharge.zip'

# Create a new zip file and add the model file to it
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write(filename, arcname=filename)

# Transfers

In [None]:
master_data['Transfer Date'] = pd.to_datetime(master_data['Transfer Date'])

# Extract the year from the 'Admission Date' column
master_data['Year'] = master_data['Transfer Date'].dt.year

# Group by year and count admissions
yearly_transfers = master_data.groupby('Year')['Patient ID'].count()

yearly_transfers

In [None]:
# Training dataset:
training_data_transfer = master_data[(master_data['Transfer Date'] >= '2019-01-01') & (master_data['Transfer Date'] <= '2020-11-30')]
train_data_transfer = training_data_transfer.groupby(training_data["Transfer Date"])["Patient ID"].count().reset_index()
train_data_transfer.columns = ["Transfer Date", "patient_count"]
print('Training data:\n',train_data_transfer.head(10))
train_df_transfer = train_data_transfer["patient_count"]
train_df_transfer.name = "patient_count"
print('\n Training data count:\n',train_df)


# Testng dataset:
testing_data_transfer = master_data[(master_data['Transfer Date'] >= '2020-12-01') & (master_data['Transfer Date'] <= '2020-12-31')]
test_data_transfer = testing_data_transfer.groupby(testing_data_transfer["Transfer Date"])["Patient ID"].count().reset_index()
test_data_transfer.columns = ["Transfer Date", "patient_count"]
print('Testing data:\n',test_data_transfer.tail(10))
test_df_transfer = test_data_transfer["patient_count"]
test_df_transfer.name = "patient_count"
print('\n Testing data count:\n',test_df_transfer)

In [None]:
train_data = train_df_transfer
test_data = test_df_transfer

# Load your time series data into a pandas DataFrame
# Replace 'your_data.csv' with your data file
#data = pd.read_csv('ADT datset V1.csv')
#data['Date'] = pd.to_datetime(data['Date'])  # Make sure the 'Date' column is in datetime format
#data.set_index('Date', inplace=True)  # Set 'Date' as the index

data = train_data

# Define the order of the SARIMA model (p, d, q), (P, D, Q, S)
# p: AutoRegressive order
# d: Differencing order
# q: Moving Average order
# P: Seasonal AutoRegressive order
# D: Seasonal Differencing order
# Q: Seasonal Moving Average order
# S: Seasonal period (e.g., 12 for monthly data with yearly seasonality)
p, d, q = 2, 2, 2
P, D, Q, S = 2, 2, 2, 12

# Fit the SARIMAX model to your data
model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = model.fit()

In [None]:
with open('transfer_model1.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)

In [None]:
import joblib 

# save model with joblib 
filename = 'joblib_model_transfer.sav'
joblib.dump(result, filename)

import zipfile

# Define the name of the zip file you want to create
zip_filename = 'joblib_model_transfer.zip'

# Create a new zip file and add the model file to it
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write(filename, arcname=filename)

## Creating forecast data set for next 3 months 

In [34]:
import pickle

# Replace 'your_file.pkl' with the path to your actual pickle file
file_path = 'admissions_model.pkl'

# Open the file in binary mode
with open(file_path, 'rb') as file:
    # Load the content of the file into a Python object
    data = pickle.load(file)

In [35]:
import pandas as pd
import numpy as np
date_range = pd.date_range(start='2024-01-01', end='2024-03-31', freq='D')
forecast_results = data.get_forecast(steps=len(date_range))
forecast_mean = forecast_results.predicted_mean
admissions_forecast_data = pd.DataFrame({
    'Date': pd.to_datetime(date_range).date,
    'Forecasted Admissions Count': np.ceil(forecast_mean).astype(int)            
                })
total_admissions = admissions_forecast_data['Forecasted Admissions Count'].sum()
total_admissions

37896

In [41]:
admissions_forecast_data.to_csv('admissions_forecast_data.csv')

In [37]:
# Replace 'your_file.pkl' with the path to your actual pickle file
file_path = 'discharge_model.pkl'

# Open the file in binary mode
with open(file_path, 'rb') as file:
    # Load the content of the file into a Python object
    data1 = pickle.load(file)
    
date_range = pd.date_range(start='2024-01-01', end='2024-03-31', freq='D')
forecast_results = data1.get_forecast(steps=len(date_range))
forecast_mean = forecast_results.predicted_mean
discharge_forecast_data = pd.DataFrame({
    'Date': pd.to_datetime(date_range).date,
    'Forecasted Discharge Count': np.ceil(forecast_mean).astype(int)            
                })
total_discharges = discharge_forecast_data['Forecasted Discharge Count'].sum()
print(total_discharges)
discharge_forecast_data

8851


Unnamed: 0,Date,Forecasted Discharge Count
2128,2024-01-01,15
2129,2024-01-02,7
2130,2024-01-03,17
2131,2024-01-04,9
2132,2024-01-05,24
...,...,...
2214,2024-03-27,189
2215,2024-03-28,177
2216,2024-03-29,197
2217,2024-03-30,229


In [40]:
discharge_forecast_data.to_csv('discharge_forecast_data.csv')

In [38]:
# Replace 'your_file.pkl' with the path to your actual pickle file
file_path = 'transfer_model.pkl'

# Open the file in binary mode
with open(file_path, 'rb') as file:
    # Load the content of the file into a Python object
    data = pickle.load(file)
    
date_range = pd.date_range(start='2024-01-01', end='2024-03-31', freq='D')
forecast_results = data.get_forecast(steps=len(date_range))
forecast_mean = forecast_results.predicted_mean
transfer_forecast_data = pd.DataFrame({
    'Date': pd.to_datetime(date_range).date,
    'Forecasted Transfer Count': np.ceil(forecast_mean).astype(int)            
                })
total_transfer = transfer_forecast_data['Forecasted Transfer Count'].sum()
print(total_transfer)
transfer_forecast_data.to_csv('transfer_forecast_data.csv')

598


In [39]:
transfer_forecast_data

Unnamed: 0,Date,Forecasted Transfer Count
1814,2024-01-01,8
1815,2024-01-02,7
1816,2024-01-03,8
1817,2024-01-04,7
1818,2024-01-05,8
...,...,...
1900,2024-03-27,6
1901,2024-03-28,6
1902,2024-03-29,6
1903,2024-03-30,5


## For Streamlit

In [32]:
import pandas as pd
import numpy as np
import psycopg2
from pandas import DataFrame
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

%matplotlib inline
import pandas as pd
import numpy as np
import random
import statsmodels.api as sm
from datetime import datetime, timedelta
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import r2_score
import plotly.graph_objects as go
import pickle

In [33]:
# Database connection parameters
db_name = 'factihealth'   # Database name
db_user = 'fh_user'  # Username
db_password = 'Facti@874'  # Password
db_host = 'redshift-cluster-factihealth.cuzgotkwtow6.ap-south-1.redshift.amazonaws.com'  # Cluster endpoint
db_port = 5439  # Port
# Connect to the database
try:
    conn = psycopg2.connect(
        dbname=db_name,
        user=db_user,
        password=db_password,
        host=db_host,
        port=db_port
    )
    print("Connected to the database successfully")
    # Create a cursor object
    cur = conn.cursor()
    # Execute a query
    cur.execute('''select admittime::DATE as admittime_date, count(distinct subject_id) from factihealth.mimic.admissions 
where date_part(year,admittime) in (2125,2126,2127,2128,2129)
group by admittime_date
order by admittime_date''')
    # Fetch the result
    rows = cur.fetchall()

    # Get the column names
    column_names = [desc[0] for desc in cur.description]

    # Create a DataFrame
    master_data = pd.DataFrame(rows, columns=column_names)

    print('Dataframe shape:', master_data.shape)
    # Fetch and print the result
except Exception as e:
    print(f"Database connection failed due to {e}")

Connected to the database successfully
Dataframe shape: (1826, 2)


In [34]:
# Training dataset:
master_data['admittime_date'] = pd.to_datetime(master_data['admittime_date'])

# Filter rows where 'admittime_date' falls within the specified range
train_data = master_data[
    (master_data['admittime_date'] >= '2125-01-01') & 
    (master_data['admittime_date'] <= '2129-11-30')
]

#training_data = master_data[(master_data['admittime_date'] >= '2125-01-01') & (master_data['admittime_date'] <= '2129-11-30')]
train_data.columns = ["admittime_date", "count"]
print('Training data:\n',train_data.tail(10))
train_df = train_data["count"]
train_df.name = "count"
print('\n Training data count:\n',train_df)


# Testng dataset:

# Filter rows where 'admittime_date' falls within the specified range
test_data = master_data[
    (master_data['admittime_date'] >= '2129-12-01') & 
    (master_data['admittime_date'] <= '2129-12-31')
]
test_data.columns = ["admittime_date", "count"]
print('Testing data:\n',test_data.tail(10))
test_df = test_data["count"]
test_df.name = "count"
print('\n Testing data count:\n',test_df)

Training data:
      admittime_date  count
1785     2129-11-21     17
1786     2129-11-22     20
1787     2129-11-23      9
1788     2129-11-24     16
1789     2129-11-25     10
1790     2129-11-26     21
1791     2129-11-27     16
1792     2129-11-28     13
1793     2129-11-29     15
1794     2129-11-30     15

 Training data count:
 0       19
1        8
2       17
3       19
4       15
        ..
1790    21
1791    16
1792    13
1793    15
1794    15
Name: count, Length: 1795, dtype: int64
Testing data:
      admittime_date  count
1816     2129-12-22     16
1817     2129-12-23     13
1818     2129-12-24     17
1819     2129-12-25     15
1820     2129-12-26     14
1821     2129-12-27     14
1822     2129-12-28     12
1823     2129-12-29     13
1824     2129-12-30     14
1825     2129-12-31     16

 Testing data count:
 1795    15
1796    16
1797    14
1798    14
1799    14
1800    11
1801    13
1802    19
1803    10
1804    15
1805    13
1806    13
1807    13
1808    11
1809    20
18

In [35]:
train_data = train_df
test_data = test_df

data = train_data

p, d, q = 2, 2, 1
P, D, Q, S = 3, 3, 3, 12

# Fit the SARIMAX model to your data
admissions_model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = admissions_model.fit()

  warn('Non-invertible starting seasonal moving average'


In [36]:
# Forecast the next 10 time points
forecast = results.get_forecast(steps=10)

# Get the forecasted values and confidence intervals
forecasted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()

print("Forecasted Values:", np.ceil(forecasted_values))
print("Confidence Intervals:", confidence_intervals)

Forecasted Values: 1795    11.0
1796    23.0
1797    17.0
1798    24.0
1799     9.0
1800    14.0
1801     8.0
1802    22.0
1803    10.0
1804    12.0
Name: predicted_mean, dtype: float64
Confidence Intervals:       lower count  upper count
1795    -2.507416    23.593652
1796     8.805640    36.225998
1797     1.628171    31.826149
1798     6.262881    40.041522
1799    -9.572277    26.350042
1800    -5.756170    32.623030
1801   -12.590305    28.115848
1802    -0.228898    42.563011
1803   -12.899490    31.967315
1804   -11.892571    34.948295


In [37]:
with open('admission_model_demo.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)

In [39]:
# Replace 'your_file.pkl' with the path to your actual pickle file
file_path = 'admission_model_demo.pkl'

# Open the file in binary mode
with open(file_path, 'rb') as file:
    # Load the content of the file into a Python object
    data1 = pickle.load(file)
    
date_range = pd.date_range(start='2024-01-01', end='2024-03-31', freq='D')

forecast_results = data1.get_forecast(steps=len(date_range))
forecast_mean = forecast_results.predicted_mean
forecast_mean_non_negative = np.abs(forecast_mean)
admissions_forecast_data = pd.DataFrame({
    'Date': pd.to_datetime(date_range).date,
    'Forecasted Admissions Count': np.ceil(forecast_mean).astype(int)            
                })
total_admissions = admissions_forecast_data['Forecasted Admissions Count'].sum()
print(total_admissions)
admissions_forecast_data.to_csv('admissions_forecast_data.csv')
admissions_forecast_data

1428


Unnamed: 0,Date,Forecasted Admissions Count
1795,2024-01-01,11
1796,2024-01-02,23
1797,2024-01-03,17
1798,2024-01-04,24
1799,2024-01-05,9
...,...,...
1881,2024-03-27,-1
1882,2024-03-28,44
1883,2024-03-29,-3
1884,2024-03-30,20


In [40]:
# Database connection parameters
db_name = 'factihealth'   # Database name
db_user = 'fh_user'  # Username
db_password = 'Facti@874'  # Password
db_host = 'redshift-cluster-factihealth.cuzgotkwtow6.ap-south-1.redshift.amazonaws.com'  # Cluster endpoint
db_port = 5439  # Port
# Connect to the database
try:
    conn = psycopg2.connect(
        dbname=db_name,
        user=db_user,
        password=db_password,
        host=db_host,
        port=db_port
    )
    print("Connected to the database successfully")
    # Create a cursor object
    cur = conn.cursor()
    # Execute a query
    cur.execute('''select dischtime::DATE as dischtime_date, count(distinct subject_id) from factihealth.mimic.admissions 
where date_part(year,dischtime) in (2125,2126,2127,2128,2129)
group by dischtime_date
order by dischtime_date''')
    # Fetch the result
    rows = cur.fetchall()

    # Get the column names
    column_names = [desc[0] for desc in cur.description]

    # Create a DataFrame
    master_data = pd.DataFrame(rows, columns=column_names)

    print('Dataframe shape:', master_data.shape)
    # Fetch and print the result
except Exception as e:
    print(f"Database connection failed due to {e}")

Connected to the database successfully
Dataframe shape: (1826, 2)


In [41]:
# Training dataset:
master_data['dischtime_date'] = pd.to_datetime(master_data['dischtime_date'])

# Filter rows where 'admittime_date' falls within the specified range
train_data = master_data[
    (master_data['dischtime_date'] >= '2125-01-01') & 
    (master_data['dischtime_date'] <= '2129-11-30')
]

#training_data = master_data[(master_data['admittime_date'] >= '2125-01-01') & (master_data['admittime_date'] <= '2129-11-30')]
train_data.columns = ["dischtime_date", "count"]
print('Training data:\n',train_data.tail(10))
train_df = train_data["count"]
train_df.name = "count"
print('\n Training data count:\n',train_df)


# Testng dataset:

# Filter rows where 'admittime_date' falls within the specified range
test_data = master_data[
    (master_data['dischtime_date'] >= '2129-12-01') & 
    (master_data['dischtime_date'] <= '2129-12-31')
]
test_data.columns = ["dischtime_date", "count"]
print('Testing data:\n',test_data.tail(10))
test_df = test_data["count"]
test_df.name = "count"
print('\n Testing data count:\n',test_df)

Training data:
      dischtime_date  count
1785     2129-11-21     11
1786     2129-11-22     13
1787     2129-11-23     19
1788     2129-11-24     22
1789     2129-11-25     24
1790     2129-11-26      7
1791     2129-11-27     18
1792     2129-11-28     15
1793     2129-11-29     15
1794     2129-11-30     12

 Training data count:
 0       10
1       17
2       18
3       11
4       20
        ..
1790     7
1791    18
1792    15
1793    15
1794    12
Name: count, Length: 1795, dtype: int64
Testing data:
      dischtime_date  count
1816     2129-12-22     16
1817     2129-12-23     14
1818     2129-12-24     13
1819     2129-12-25     20
1820     2129-12-26     11
1821     2129-12-27     12
1822     2129-12-28     15
1823     2129-12-29     17
1824     2129-12-30     12
1825     2129-12-31     16

 Testing data count:
 1795     7
1796    16
1797    17
1798    13
1799    17
1800    14
1801    11
1802    16
1803    13
1804    14
1805    14
1806     8
1807    11
1808    17
1809    15
18

In [42]:
train_data = train_df
test_data = test_df

data = train_data

p, d, q = 2, 2, 1
P, D, Q, S = 3, 3, 3, 12

# Fit the SARIMAX model to your data
discharge_model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = discharge_model.fit()

  warn('Non-invertible starting seasonal moving average'


In [43]:
with open('discharge_model_demo.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)

In [44]:
# Replace 'your_file.pkl' with the path to your actual pickle file
file_path = 'discharge_model_demo.pkl'

# Open the file in binary mode
with open(file_path, 'rb') as file:
    # Load the content of the file into a Python object
    data1 = pickle.load(file)
    
date_range = pd.date_range(start='2024-01-01', end='2024-03-31', freq='D')
forecast_results = data1.get_forecast(steps=len(date_range))
forecast_mean = forecast_results.predicted_mean
forecast_mean_non_negative = np.abs(forecast_mean)
discharge_forecast_data = pd.DataFrame({
    'Date': pd.to_datetime(date_range).date,
    'Forecasted Discharge Count': np.ceil(forecast_mean).astype(int)            
                })
total_discharges = discharge_forecast_data['Forecasted Discharge Count'].sum()
print(total_discharges)
discharge_forecast_data.to_csv('discharge_forecast_data.csv')
discharge_forecast_data

3164


Unnamed: 0,Date,Forecasted Discharge Count
1795,2024-01-01,14
1796,2024-01-02,22
1797,2024-01-03,19
1798,2024-01-04,17
1799,2024-01-05,20
...,...,...
1881,2024-03-27,48
1882,2024-03-28,62
1883,2024-03-29,60
1884,2024-03-30,92


In [45]:
# Forecast the next 10 time points
forecast = results.get_forecast(steps=10)

# Get the forecasted values and confidence intervals
forecasted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()

print("Forecasted Values:", np.ceil(forecasted_values))
print("Confidence Intervals:", confidence_intervals)

Forecasted Values: 1795    14.0
1796    22.0
1797    19.0
1798    17.0
1799    20.0
1800    27.0
1801    28.0
1802     8.0
1803    24.0
1804    21.0
Name: predicted_mean, dtype: float64
Confidence Intervals:       lower count  upper count
1795     0.690229    27.107860
1796     7.500657    34.959116
1797     3.092128    33.140262
1798    -0.233478    33.472914
1799     1.875710    37.497695
1800     7.741986    45.741206
1801     7.404748    47.672463
1802   -13.489176    28.743457
1803     0.932666    45.173648
1804    -2.536540    43.606120


In [46]:
# Database connection parameters
db_name = 'factihealth'   # Database name
db_user = 'fh_user'  # Username
db_password = 'Facti@874'  # Password
db_host = 'redshift-cluster-factihealth.cuzgotkwtow6.ap-south-1.redshift.amazonaws.com'  # Cluster endpoint
db_port = 5439  # Port
# Connect to the database
try:
    conn = psycopg2.connect(
        dbname=db_name,
        user=db_user,
        password=db_password,
        host=db_host,
        port=db_port
    )
    print("Connected to the database successfully")
    # Create a cursor object
    cur = conn.cursor()
    # Execute a query
    cur.execute('''select transtime:: Date as transtime_date, count(distinct subject_id)
                    from
                    (
                    select distinct subject_id,
                    case 
                        when (lower(admission_location) like '%transfer%') then admittime
                        when (lower(admission_location) not like '%transfer%') then null 
                    end as transtime
                    from factihealth.mimic.admissions 
                    where date_part(year,admittime) in (2125,2126,2127,2128,2129)
                    )
                    group by transtime_date
                    order by transtime_date
                    ''')
    # Fetch the result
    rows = cur.fetchall()

    # Get the column names
    column_names = [desc[0] for desc in cur.description]

    # Create a DataFrame
    master_data = pd.DataFrame(rows, columns=column_names)

    print('Dataframe shape:', master_data.shape)
    # Fetch and print the result
except Exception as e:
    print(f"Database connection failed due to {e}")

Connected to the database successfully
Dataframe shape: (1374, 2)


In [47]:
# Training dataset:
master_data['transtime_date'] = pd.to_datetime(master_data['transtime_date'])

# Filter rows where 'admittime_date' falls within the specified range
train_data = master_data[
    (master_data['transtime_date'] >= '2125-01-01') & 
    (master_data['transtime_date'] <= '2129-11-30')
]

#training_data = master_data[(master_data['admittime_date'] >= '2125-01-01') & (master_data['admittime_date'] <= '2129-11-30')]
train_data.columns = ["transtime_date", "count"]
print('Training data:\n',train_data.tail(10))
train_df = train_data["count"]
train_df.name = "count"
print('\n Training data count:\n',train_df)


# Testng dataset:

# Filter rows where 'admittime_date' falls within the specified range
test_data = master_data[
    (master_data['transtime_date'] >= '2129-12-01') & 
    (master_data['transtime_date'] <= '2129-12-31')
]
test_data.columns = ["transtime_date", "count"]
print('Testing data:\n',test_data.tail(10))
test_df = test_data["count"]
test_df.name = "count"
print('\n Testing data count:\n',test_df)

Training data:
      transtime_date  count
1337     2129-11-20      1
1338     2129-11-21      1
1339     2129-11-22      1
1340     2129-11-23      1
1341     2129-11-24      2
1342     2129-11-25      1
1343     2129-11-26      1
1344     2129-11-28      1
1345     2129-11-29      2
1346     2129-11-30      1

 Training data count:
 0       1
1       1
2       2
3       6
4       1
       ..
1342    1
1343    1
1344    1
1345    2
1346    1
Name: count, Length: 1347, dtype: int64
Testing data:
      transtime_date  count
1363     2129-12-20      1
1364     2129-12-21      1
1365     2129-12-22      1
1366     2129-12-23      2
1367     2129-12-24      2
1368     2129-12-26      3
1369     2129-12-27      1
1370     2129-12-29      2
1371     2129-12-30      1
1372     2129-12-31      4

 Testing data count:
 1347    2
1348    1
1349    2
1350    2
1351    1
1352    1
1353    1
1354    1
1355    4
1356    2
1357    2
1358    1
1359    3
1360    1
1361    1
1362    1
1363    1
1364    

In [48]:
train_data = train_df
test_data = test_df

data = train_data

p, d, q = 2, 2, 1
P, D, Q, S = 3, 3, 3, 12

# Fit the SARIMAX model to your data
transfer_model = sm.tsa.SARIMAX(data, order=(p, d, q), seasonal_order=(P, D, Q, S))
results = transfer_model.fit()

  warn('Non-invertible starting seasonal moving average'


In [49]:
with open('transfer_model_demo.pkl', 'wb') as model_file:
    pickle.dump(results, model_file)

In [50]:
# Replace 'your_file.pkl' with the path to your actual pickle file
file_path = 'transfer_model_demo.pkl'

# Open the file in binary mode
with open(file_path, 'rb') as file:
    # Load the content of the file into a Python object
    data1 = pickle.load(file)
    
date_range = pd.date_range(start='2024-01-01', end='2024-03-31', freq='D')
forecast_results = data1.get_forecast(steps=len(date_range))
forecast_mean = forecast_results.predicted_mean
forecast_mean_non_negative = np.abs(forecast_mean)
transfer_forecast_data = pd.DataFrame({
    'Date': pd.to_datetime(date_range).date,
    'Forecasted Transfer Count': np.ceil(forecast_mean).astype(int)            
                })
total_transfers = transfer_forecast_data['Forecasted Transfer Count'].sum()
print(total_transfers)
transfer_forecast_data.to_csv('transfer_forecast_data.csv')
transfer_forecast_data

-640


Unnamed: 0,Date,Forecasted Transfer Count
1347,2024-01-01,3
1348,2024-01-02,3
1349,2024-01-03,0
1350,2024-01-04,1
1351,2024-01-05,0
...,...,...
1433,2024-03-27,-20
1434,2024-03-28,-26
1435,2024-03-29,-24
1436,2024-03-30,-18


In [51]:
# Forecast the next 10 time points
forecast = results.get_forecast(steps=10)

# Get the forecasted values and confidence intervals
forecasted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()

print("Forecasted Values:", np.ceil(forecasted_values))
print("Confidence Intervals:", confidence_intervals)

Forecasted Values: 1347    3.0
1348    3.0
1349   -0.0
1350    1.0
1351   -0.0
1352    1.0
1353    2.0
1354   -0.0
1355   -0.0
1356   -1.0
Name: predicted_mean, dtype: float64
Confidence Intervals:       lower count  upper count
1347    -1.494822     5.565584
1348    -1.611601     5.999187
1349    -4.713862     3.541842
1350    -4.650242     4.666838
1351    -5.597146     4.375544
1352    -4.489725     6.142619
1353    -4.166896     7.146756
1354    -6.763642     5.151921
1355    -6.595819     5.905250
1356    -8.453088     4.617436
