In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load all files
patients = pd.read_csv('/kaggle/input/hospital-management-dataset/patients.csv')
doctors = pd.read_csv('/kaggle/input/hospital-management-dataset/doctors.csv')
appointments = pd.read_csv('/kaggle/input/hospital-management-dataset/appointments.csv')
treatments = pd.read_csv('/kaggle/input/hospital-management-dataset/treatments.csv')
billing = pd.read_csv('/kaggle/input/hospital-management-dataset/billing.csv')


In [None]:
# Peek at first rows
print(patients.head())
print(doctors.head())
print(appointments.head())
print(treatments.head())
print(billing.head())

# Data info
print(patients.info())
print(doctors.info())
print(appointments.info())
print(treatments.info())
print(billing.info())

# Stats for numeric columns
print(patients.describe())
print(doctors.describe())
print(appointments.describe())
print(treatments.describe())
print(billing.describe())


In [None]:
appointments['appointment_date'] = pd.to_datetime(appointments['appointment_date'])
treatments['treatment_date'] = pd.to_datetime(treatments['treatment_date'])
billing['bill_date'] = pd.to_datetime(billing['bill_date'])

In [None]:
# Bar Chart: Count per status
appointments['status'].value_counts().plot(kind='bar', title='Appointment Status')
plt.show()

# Pie Chart: Percentage distribution
appointments.groupby('status').size().plot(kind='pie', autopct='%1.0f%%', title='Appointment Status Distribution')
plt.ylabel('')
plt.show()


In [None]:
appointments['reason_for_visit'].value_counts().plot(kind='barh', title='Reasons for Visit')
plt.show()


In [None]:
sns.histplot(treatments['cost'], bins=20, kde=True)
plt.title('Distribution of Treatment Costs')
plt.show()


In [None]:
billing.groupby('payment_method').size().plot(kind='bar', title='Payment Methods')
plt.show()


In [None]:
merged = appointments.merge(treatments, on='appointment_id').merge(billing, on='treatment_id')

daily_income = merged.groupby(merged['bill_date'].dt.date)['amount'].sum()
daily_income.plot(figsize=(12,6), title='Daily Hospital Income')
plt.xlabel('Date')
plt.ylabel('Income')
plt.show()


In [None]:
doctor_treatment = merged.groupby('doctor_id')['amount'].sum().reset_index()
doctor_treatment = doctor_treatment.merge(doctors[['doctor_id','first_name','last_name']], on='doctor_id')
doctor_treatment['doctor_name'] = doctor_treatment['first_name'] + ' ' + doctor_treatment['last_name']
doctor_treatment = doctor_treatment.sort_values(by='amount', ascending=False)

sns.barplot(data=doctor_treatment, x='amount', y='doctor_name')
plt.title('Total Revenue by Doctor')
plt.show()


In [None]:
no_show_rate = appointments[appointments['status'] == 'No-show'].groupby('doctor_id').size().reset_index(name='no_shows')
total_appointments = appointments.groupby('doctor_id').size().reset_index(name='total')

no_show_data = no_show_rate.merge(total_appointments, on='doctor_id')
no_show_data['no_show_rate'] = no_show_data['no_shows'] / no_show_data['total']

no_show_data = no_show_data.merge(doctors[['doctor_id','first_name','last_name']], on='doctor_id')
no_show_data['doctor_name'] = no_show_data['first_name'] + ' ' + no_show_data['last_name']
no_show_data = no_show_data.sort_values(by='no_show_rate', ascending=False)

sns.barplot(data=no_show_data, x='no_show_rate', y='doctor_name')
plt.title('No-show Rate by Doctor')
plt.show()
