In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets from CSV files
encounters = pd.read_csv("D:\\data visualisationsem2.py\\patientDetails\\encounters.csv")
organizations = pd.read_csv("D:\\data visualisationsem2.py\\patientDetails\\organizations.csv")
patients = pd.read_csv("D:\\data visualisationsem2.py\\patientDetails\\patients.csv")
payers = pd.read_csv("D:\\data visualisationsem2.py\\patientDetails\\payers.csv")
procedures = pd.read_csv("D:\\data visualisationsem2.py\\patientDetails\\procedures.csv")

# Convert date columns to datetime format
patients['date_of_birth'] = pd.to_datetime(patients['date_of_birth'], errors='coerce')
encounters['visit_date'] = pd.to_datetime(encounters['visit_date'], errors='coerce')
procedures['procedure_date'] = pd.to_datetime(procedures['procedure_date'], errors='coerce')

# Merge the tables using relevant keys
merged_df = encounters.merge(patients, on='patient_id', how='left') \
                      .merge(organizations, on='organization_id', how='left') \
                      .merge(payers, on='payer_id', how='left') \
                      .merge(procedures, on='encounter_id', how='left')

# Display first few rows of merged data
print("Merged DataFrame:")
print(merged_df.head())

# ----------------- Data Visualization ----------------- #

# 1. Count of Visits by Reason
plt.figure(figsize=(8, 4))
sns.countplot(x='reason', data=merged_df, palette='viridis')
plt.title("Number of Patient Visits by Reason")
plt.xlabel("Reason for Visit")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.show()

# 2. Patient Visits per Hospital
plt.figure(figsize=(8, 4))
sns.countplot(x='organization_name', data=merged_df, palette='coolwarm')
plt.title("Number of Visits Per Hospital")
plt.xlabel("Hospital/Clinic")
plt.ylabel("Count of Visits")
plt.xticks(rotation=45)
plt.show()

# 3. Age Distribution of Patients
merged_df['age'] = (pd.Timestamp('today') - merged_df['date_of_birth']).dt.days // 365
plt.figure(figsize=(8, 4))
sns.histplot(merged_df['age'], bins=10, kde=True, color='blue')
plt.title("Age Distribution of Patients")
plt.xlabel("Age")
plt.ylabel("Number of Patients")
plt.show()

# 4. Procedures Performed in Each Hospital
plt.figure(figsize=(8, 4))
sns.countplot(y='procedure_name', data=merged_df, hue='organization_name', palette='magma')
plt.title("Procedures Performed in Each Hospital")
plt.xlabel("Count")
plt.ylabel("Procedure Name")
plt.legend(title="Hospital")
plt.show()
