In [8]:
#Importing Libraries and Loading Dataset

import pandas as pd
df = pd.read_csv("C:/Users/Dhanuja/Downloads/archive (7)/hospital_communication_energy_system.csv")
print(df.head())
print(df.info())
print(df.describe())

             Timestamp Patient ID  Temperature (°C)  Humidity (%)  \
0  2025-01-01 00:00:00      P0001         35.959197     53.116400   
1  2025-01-01 00:05:00      P0002         36.591216     55.613547   
2  2025-01-01 00:10:00      P0003         36.462011     58.424429   
3  2025-01-01 00:15:00      P0004         36.410249     49.649295   
4  2025-01-01 00:20:00      P0005         36.258128     42.872100   

   Oxygen Level (%)  Heart Rate (bpm) Blood Pressure (mmHg)  \
0         98.863328                76             (111, 84)   
1         97.408234                98             (119, 79)   
2         97.106903                93             (132, 79)   
3         96.425162                69             (120, 75)   
4         97.484515                78             (122, 77)   

   Energy Consumption (kWh) Energy Source  Renewable Energy Usage (%)  ...  \
0                 16.747023          Grid                   52.755948  ...   
1                 18.816142          Wind         

In [2]:
#Checking for Missing Values and Cleaning

print(df.isnull().sum())
df = df.dropna()

Timestamp                              0
Patient ID                             0
Temperature (°C)                       0
Humidity (%)                           0
Oxygen Level (%)                       0
Heart Rate (bpm)                       0
Blood Pressure (mmHg)                  0
Energy Consumption (kWh)               0
Energy Source                          0
Renewable Energy Usage (%)             0
HVAC Power Usage (kWh)                 0
Lighting Power Usage (kWh)             0
Medical Equipment Power Usage (kWh)    0
Total Power Usage (kWh)                0
Room Temperature (°C)                  0
Room Humidity (%)                      0
HVAC Mode                              0
HVAC Efficiency (%)                    0
Outdoor Temperature (°C)               0
Outdoor Humidity (%)                   0
Day of the Week                        0
Season                                 0
Energy Saving Mode                     0
System Health Check                    0
AI Predicted Hea

In [3]:
#Handling Duplicate Entries

df = df.drop_duplicates()

In [6]:
#Preparing Timestamp Data and Extracting Time Features

import pandas as pd
import numpy as np

df = pd.read_csv("C:/Users/Dhanuja/Downloads/archive (7)/hospital_communication_energy_system.csv")

print("Columns names in your dataset:")
print(df.columns.tolist())

df['Timestamp'] = pd.to_datetime(df['Timestamp'])

df['Hour'] = df['Timestamp'].dt.hour
df['Day_of_Week'] = df['Timestamp'].dt.dayofweek
df['Month'] = df['Timestamp'].dt.month
df['Date'] = df['Timestamp'].dt.date

print("\nTime features created successfully!")
print("Sample of time features:")
print(df[['Timestamp', 'Hour', 'Day_of_Week', 'Month']].head())

Columns names in your dataset:
['Timestamp', 'Patient ID', 'Temperature (°C)', 'Humidity (%)', 'Oxygen Level (%)', 'Heart Rate (bpm)', 'Blood Pressure (mmHg)', 'Energy Consumption (kWh)', 'Energy Source', 'Renewable Energy Usage (%)', 'HVAC Power Usage (kWh)', 'Lighting Power Usage (kWh)', 'Medical Equipment Power Usage (kWh)', 'Total Power Usage (kWh)', 'Room Temperature (°C)', 'Room Humidity (%)', 'HVAC Mode', 'HVAC Efficiency (%)', 'Outdoor Temperature (°C)', 'Outdoor Humidity (%)', 'Day of the Week', 'Season', 'Energy Saving Mode', 'System Health Check', 'AI Predicted Health Status']

Time features created successfully!
Sample of time features:
            Timestamp  Hour  Day_of_Week  Month
0 2025-01-01 00:00:00     0            2      1
1 2025-01-01 00:05:00     0            2      1
2 2025-01-01 00:10:00     0            2      1
3 2025-01-01 00:15:00     0            2      1
4 2025-01-01 00:20:00     0            2      1


In [7]:
#Creating Healthcare-Specific Features and Target Variable

df['Critical_Hours'] = np.where(
    (df['Hour'].between(6, 22)) & (df['Day_of_Week'] < 5), 1, 0
)
energy_threshold = df['Energy Consumption (kWh)'].quantile(0.7)
df['Energy_Waste'] = np.where(
    (df['Critical_Hours'] ==0) &
    (df['Energy Consumption (kWh)'] > energy_threshold), 1, 0
)
df['HVAC_to_Total_Ratio'] = df['HVAC Power Usage (kWh)'] / df['Total Power Usage (kWh)']
df['Medical_to_Total_Ratio'] = df['Medical Equipment Power Usage (kWh)'] / df['Total Power Usage (kWh)']
df['Lighting_to_Total_Ratio'] = df['Lighting Power Usage (kWh)'] / df['Total Power Usage (kWh)']

print("Healthcare-specific features created!")
print(f"Energy waste detected in {df['Energy_Waste'].sum()} out of {len(df)} records")

Healthcare-specific features created!
Energy waste detected in 1532 out of 10000 records
