# Hospital Data Analysis — Final Project\n\nFull Project Notebook with data loading, cleaning, EDA, visualizations, KPIs, and export.\n\nPlace the CSVs (patients.csv, admissions.csv, doctors.csv, treatments.csv, billing.csv, beds.csv) in the same folder and run all cells.

In [None]:
import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n%matplotlib inline\npd.options.display.max_columns = 200\n

In [None]:
# Load datasets\npatients = pd.read_csv('patients.csv')\nadmissions = pd.read_csv('admissions.csv')\ndoctors = pd.read_csv('doctors.csv')\ntreatments = pd.read_csv('treatments.csv')\nbilling = pd.read_csv('billing.csv')\nbeds = pd.read_csv('beds.csv')\nprint('Loaded files:')\nprint('patients', patients.shape)\npatients.head()\n

In [None]:
# Basic cleaning and merges\nadmissions['AdmissionDate'] = pd.to_datetime(admissions['AdmissionDate'], errors='coerce')\nadmissions['DischargeDate'] = pd.to_datetime(admissions['DischargeDate'], errors='coerce')\npatients['AgeGroup'] = pd.cut(patients['Age'], bins=[0,20,40,60,120], labels=['0-20','20-40','40-60','60+'], right=False)\ndf = admissions.merge(patients, on='PatientID', how='left')\ndf = df.merge(billing[['PatientID','Amount']], on='PatientID', how='left')\ndf = df.merge(treatments[['PatientID','TreatmentID','DoctorID','Outcome']], on='PatientID', how='left')\ndf = df.merge(doctors[['DoctorID','DoctorName']], on='DoctorID', how='left')\ndf['StayDays'] = (df['DischargeDate'] - df['AdmissionDate']).dt.days\ndf['Month'] = df['AdmissionDate'].dt.month\ndf['Year'] = df['AdmissionDate'].dt.year\ndf.head()\n

In [None]:
# KPIs\nALOS = df['StayDays'].mean()\nrecovery_rate = (df['Outcome']=='Recovered').sum() / df.shape[0] * 100\ncost_per_patient = df.groupby('PatientID')['Amount'].sum().mean()\nprint(f'ALOS (days): {ALOS:.2f}')\nprint(f'Recovery Rate (%): {recovery_rate:.2f}')\nprint(f'Avg Cost per Patient: {cost_per_patient:.2f}')\n

In [None]:
# Save cleaned master file for Power BI\ndf.to_csv('master_hospital_data.csv', index=False)\nprint('Saved master_hospital_data.csv')\n

Notebook generated on: 2025-11-24 10:13:54 UTC