# Taiwan Hospital Efficiency Analysis (2016‑2018)

Generated: 2025-04-26 04:21:21

本 Notebook 整合了資料載入、清理、描述統計、相關性分析、迴歸模型、叢集分析與視覺化結果，以支援醫院降本增效及政府資源配置決策。

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from scipy.stats import ttest_ind

# Display settings
pd.set_option('display.max_columns', None)
sns.set_theme(style="whitegrid")


In [None]:

# Adjust path if notebook moved elsewhere
data_dir = '/mnt/data'
files = {
    '2016': 'df_2016_cleaningdata.xlsx',
    '2017': 'df_2017_cleaningdata.xlsx',
    '2018': 'df_2018_cleaningdata.xlsx',
}

df16 = pd.read_excel(f'{data_dir}/{files["2016"]}', sheet_name='處理後(英文)')
df17 = pd.read_excel(f'{data_dir}/{files["2017"]}', sheet_name='處理後(英文)')
df18 = pd.read_excel(f'{data_dir}/{files["2018"]}', sheet_name='清完版本')

def standardize(df):
    rename_map = {
        'Year': 'year',
        'Institution Code': 'institution_code',
        'Institution Name': 'institution_name',
        'Year ': 'year'
    }
    return df.rename(columns=rename_map)

full = pd.concat([standardize(df16), standardize(df17), standardize(df18)], ignore_index=True)

numeric_cols = ['medical_operating_margin','avg_nurse_to_patient_ratio','case_cnt_per_physician',
                'claim_points_per_outpatient_visit','claim_points_per_inpatient_admission',
                'outpatient_visit_ratio','inpatient_admissions_per_bed']
for col in numeric_cols:
    full[col] = pd.to_numeric(full[col], errors='coerce')

print('Data shape:', full.shape)
full.head()


## Descriptive Statistics

In [None]:

full.groupby('year')['medical_operating_margin'].describe()


## Correlation Analysis

In [None]:

corr = full[numeric_cols].corr()
plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

corr['medical_operating_margin'].sort_values(ascending=False)


## Multiple Linear Regression
Dependent variable: **medical_operating_margin**

In [None]:

X = full[['case_cnt_per_physician','avg_nurse_to_patient_ratio',
          'claim_points_per_outpatient_visit','claim_points_per_inpatient_admission',
          'outpatient_visit_ratio','inpatient_admissions_per_bed']]
X = sm.add_constant(X)
y = full['medical_operating_margin']
model = sm.OLS(y, X, missing='drop').fit()
print(model.summary())


## K‑Means Clustering

In [None]:

features = ['medical_operating_margin','case_cnt_per_physician',
            'avg_nurse_to_patient_ratio','inpatient_admissions_per_bed']
df_cluster = full[features].dropna()
scaler = StandardScaler()
scaled = scaler.fit_transform(df_cluster)
kmeans = KMeans(n_clusters=3, random_state=0, n_init=10).fit(scaled)
df_cluster['cluster'] = kmeans.labels_
display(df_cluster['cluster'].value_counts())

centers = scaler.inverse_transform(kmeans.cluster_centers_)
centers_df = pd.DataFrame(centers, columns=features)
centers_df.index = ['Cluster 0','Cluster 1','Cluster 2']
centers_df


## Regional & Hospital Type Differences

In [None]:

region_cols = ['is_northern_region','is_central_region','is_southern_region',
               'is_kaoping_region','is_eastern_region']
region_margin = {col: full[full[col]==1]['medical_operating_margin'].mean()
                 for col in region_cols}
pd.Series(region_margin).sort_values(ascending=False)


### Public vs Private Hospitals

In [None]:

margin_pub = full[full['is_private']==0]['medical_operating_margin'].dropna()
margin_priv = full[full['is_private']==1]['medical_operating_margin'].dropna()
stat, p = ttest_ind(margin_pub, margin_priv)
print(f'T‑stat={stat:.3f}, p={p:.3f}')
