# Multivariate regression on hospital admission rate in Singapore

## Data preprocessing

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('hospital-admission-rate-by-age-and-sex.csv')
#dataset sourced from https://data.gov.sg/dataset/hospital-admission-rate-by-age-and-sex
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 1] = le.fit_transform(X[:, 1])
X[:, 2] = le.fit_transform(X[:, 2])
X[:, 3] = le.fit_transform(X[:, 3])


Facility type:

Acute = 0;
Psychiatric Hospitals = 2;
Community Hospitals = 1

Sex:

Male = 1;
Female = 0

Age:

0-14 Years = 0;
15-64 years = 1;
65 years & over = 2


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state  = 0)

## Training the multivariate linear regression model on the Training set

In [5]:
from sklearn.linear_model import LinearRegression
lr_regressor = LinearRegression()
lr_regressor.fit(X_train, y_train)

LinearRegression()

## Training the Random Forest Regression model on the whole dataset

In [6]:
from sklearn.ensemble import RandomForestRegressor
rf_regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
rf_regressor.fit(X_train, y_train)

RandomForestRegressor(n_estimators=10, random_state=0)

## Training the Decision Tree Regression model on the Training set

In [7]:
from sklearn.tree import DecisionTreeRegressor
dt_regressor = DecisionTreeRegressor(random_state = 0)
dt_regressor.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

## Predicting the Test set results

In [8]:
lr_y_pred = lr_regressor.predict(X_test)
rf_y_pred = rf_regressor.predict(X_test)
dt_y_pred = dt_regressor.predict(X_test)

In [9]:
from sklearn.metrics import r2_score
np.set_printoptions(precision=2)
print("Linear Regression Model:", r2_score(y_test, lr_y_pred))
print("Random Forest Regression Model:", r2_score(y_test, rf_y_pred))
print("Decision Tree Regression Model:", r2_score(y_test, dt_y_pred))

Linear Regression Model: 0.52079178894673
Random Forest Regression Model: 0.9963226639436341
Decision Tree Regression Model: 0.9944678728974797


Random Forest Regression model has the highest R2 score

## Predicting the admission rate of Psychiatric Hospitals among Male aged 15-64 years in 2021

In [10]:
#Using random forest regression model
print(rf_regressor.predict([[2021, 2, 1, 1]]))

[2.94]
