In [1]:
# pip install -r requirements.txt --quiet

In [2]:
import pandas as pd
import building_ingestion_utils as biu
import modeling_utils as mu
import data_utils as du
import optimization_utils as ou
import weather_ingestion_utils as wiu
import energy_price_utils as epu
import json

directory = 'industrial_sites'

dataframes = du.restructure_dataframes(biu.get_dataframes(directory, ';'))

In [3]:
print(f"Total dataframes created: {len(dataframes)}")

for name, df in dataframes.items():
    print(f'DataFrame: {name} loaded with shape: {df.shape}')

Total dataframes created: 3
DataFrame: industrial_site2 loaded with shape: (42760, 29)
DataFrame: industrial_site3 loaded with shape: (42760, 44)
DataFrame: industrial_site1 loaded with shape: (42760, 54)


In [4]:
du.check_column_names(dataframes)

DataFrame: industrial_site2
Columns: ['DataFrame Name', 'Time', '01 General_Electric_Active Energy', '02 Production_Electric_Active Energy', '03 Chiller Group_Electric_Active Energy', '04 UTA_Electric_Active Energy', '05 Compressors_Electric_Active Energy', '06 Offices_Electric_Active Energy', '07 Data Center_Electric_Active Energy', '08 Technological Centers_Electric_Active Energy', 'General_Electric_Active Energy', '01 General_Electric_Power Factor', '02 Production_Electric_Power Factor', '03 Chiller Group_Electric_Power Factor', '04 UTA_Electric_Power Factor', '05 Compressors_Electric_Power Factor', '06 Offices_Electric_Power Factor', '07 Data Center_Electric_Power Factor', '08 Technological Centers_Electric_Power Factor', 'General_Electric_Power Factor', '01 General_Electric_Active Power', '02 Production_Electric_Active Power', '03 Chiller Group_Electric_Active Power', '04 UTA_Electric_Active Power', '05 Compressors_Electric_Active Power', '06 Offices_Electric_Active Power', '07 Da

Data is saved in dictionary of dataframes called `dataframe`

In [5]:
#   new_entry_key = 'energy'
#   new_entry_value = 'kWh'
# add_entry_to_units_dict(file_path, new_entry_key, new_entry_value, verbose=False)

units_dict = biu.read_units_dict_from_json('unit_dict.json')

dataframes = biu.add_units_to_column_names(dataframes, units_dict)


du.column_presence_checker(dataframes)

Unnamed: 0,DataFrame Name,01 General Transformer 1234_Electric_Active Energy (kWh),01 General Transformer 1234_Electric_Active Power (kW),01 General Transformer 1234_Electric_Power Factor (real),01 General_Electric_Active Energy (kWh),01 General_Electric_Active Power (kW),01 General_Electric_Power Factor (real),01 Normal General_Electric_Active Energy (kWh),01 Normal General_Electric_Active Power (kW),01 Normal General_Electric_Power Factor (real),...,General_Technical_Efficiency (%),General_Technical_Flow Rate (m3/h),General_Technical_Pressure (bar),General_Technical_Temperature (C),General_Vapour_Flow Rate (m3/h),General_Vapour_Fumes Temperature (C),General_Vapour_Pressure (bar),General_Vapour_Quantity (m3),General_Water_Flow Rate (m3/h),Time
0,industrial_site2,Column not present,Column not present,Column not present,float64,float64,float64,Column not present,Column not present,Column not present,...,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,object
1,industrial_site3,float64,float64,float64,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,...,float64,float64,float64,float64,Column not present,Column not present,Column not present,Column not present,Column not present,object
2,industrial_site1,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,float64,float64,float64,...,Column not present,Column not present,Column not present,Column not present,float64,float64,float64,float64,float64,object


In [6]:
du.nan_checker(dataframes)

Unnamed: 0,DataFrame Name,01 General Transformer 1234_Electric_Active Energy (kWh),01 General Transformer 1234_Electric_Active Power (kW),01 General Transformer 1234_Electric_Power Factor (real),01 General_Electric_Active Energy (kWh),01 General_Electric_Active Power (kW),01 General_Electric_Power Factor (real),01 Normal General_Electric_Active Energy (kWh),01 Normal General_Electric_Active Power (kW),01 Normal General_Electric_Power Factor (real),...,General_Technical_Efficiency (%),General_Technical_Flow Rate (m3/h),General_Technical_Pressure (bar),General_Technical_Temperature (C),General_Vapour_Flow Rate (m3/h),General_Vapour_Fumes Temperature (C),General_Vapour_Pressure (bar),General_Vapour_Quantity (m3),General_Water_Flow Rate (m3/h),Time
0,No NaNs,Column not present,Column not present,Column not present,Some NaNs,Some NaNs,Some NaNs,Column not present,Column not present,Column not present,...,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,No NaNs
1,No NaNs,Some NaNs,Some NaNs,Some NaNs,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,...,Some NaNs,Some NaNs,Some NaNs,Some NaNs,Column not present,Column not present,Column not present,Column not present,Column not present,No NaNs
2,No NaNs,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,Some NaNs,Some NaNs,Some NaNs,...,Column not present,Column not present,Column not present,Column not present,Some NaNs,Some NaNs,Some NaNs,Some NaNs,Some NaNs,No NaNs


In [7]:
datetime_format = '%d/%m/%y %H:%M'
dataframes = biu.convert_and_transform_date_columns(dict_of_dfs=dataframes, datetime_format=datetime_format, date_column='Time', verbose=True)




DataFrame: industrial_site2 - Converted Time to datetime
DataFrame: industrial_site3 - Converted Time to datetime
DataFrame: industrial_site1 - Converted Time to datetime


In [8]:
du.nan_checker(dataframes)

Unnamed: 0,DataFrame Name,01 General Transformer 1234_Electric_Active Energy (kWh),01 General Transformer 1234_Electric_Active Power (kW),01 General Transformer 1234_Electric_Power Factor (real),01 General_Electric_Active Energy (kWh),01 General_Electric_Active Power (kW),01 General_Electric_Power Factor (real),01 Normal General_Electric_Active Energy (kWh),01 Normal General_Electric_Active Power (kW),01 Normal General_Electric_Power Factor (real),...,General_Vapour_Fumes Temperature (C),General_Vapour_Pressure (bar),General_Vapour_Quantity (m3),General_Water_Flow Rate (m3/h),Time,day_of_week_cos,day_of_week_sin,is_weekend,month_cos,month_sin
0,No NaNs,Column not present,Column not present,Column not present,Some NaNs,Some NaNs,Some NaNs,Column not present,Column not present,Column not present,...,Column not present,Column not present,Column not present,Column not present,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs
1,No NaNs,Some NaNs,Some NaNs,Some NaNs,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,...,Column not present,Column not present,Column not present,Column not present,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs
2,No NaNs,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,Some NaNs,Some NaNs,Some NaNs,...,Some NaNs,Some NaNs,Some NaNs,Some NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs


In [9]:
du.preview_dict_of_dfs(dataframes)

Preview of DataFrame for industrial_site2:


Unnamed: 0,DataFrame Name,Time,01 General_Electric_Active Energy (kWh),02 Production_Electric_Active Energy (kWh),03 Chiller Group_Electric_Active Energy (kWh),04 UTA_Electric_Active Energy (kWh),05 Compressors_Electric_Active Energy (kWh),06 Offices_Electric_Active Energy (kWh),07 Data Center_Electric_Active Energy (kWh),08 Technological Centers_Electric_Active Energy (kWh),...,05 Compressors_Electric_Active Power (kW),06 Offices_Electric_Active Power (kW),07 Data Center_Electric_Active Power (kW),08 Technological Centers_Electric_Active Power (kW),General_Electric_Active Power (kW),is_weekend,month_sin,month_cos,day_of_week_sin,day_of_week_cos
0,industrial_site2,2022-07-01 00:00:00,133.0,16.0,52.0,10.0,4.0,1.0,0.0,7.0,...,16.7,2.94,0.26,34.7,75.6,0,-0.5,-0.866025,-0.433884,-0.900969
1,industrial_site2,2022-07-01 00:15:00,131.0,16.0,51.0,10.0,4.0,0.0,0.0,8.0,...,15.5,3.02,0.26,30.5,76.4,0,-0.5,-0.866025,-0.433884,-0.900969
2,industrial_site2,2022-07-01 00:30:00,129.0,15.0,51.0,10.0,5.0,1.0,0.0,7.0,...,15.7,2.7,0.25,29.5,76.9,0,-0.5,-0.866025,-0.433884,-0.900969
3,industrial_site2,2022-07-01 00:45:00,123.0,13.0,46.0,10.0,4.0,1.0,0.0,7.0,...,16.9,2.82,0.26,28.8,74.6,0,-0.5,-0.866025,-0.433884,-0.900969
4,industrial_site2,2022-07-01 01:00:00,128.0,17.0,47.0,10.0,4.0,1.0,0.0,8.0,...,18.1,3.24,0.26,30.5,74.7,0,-0.5,-0.866025,-0.433884,-0.900969




Preview of DataFrame for industrial_site3:


Unnamed: 0,DataFrame Name,Time,01 General Transformer 1234_Electric_Active Energy (kWh),02 Chiller Group_Electric_Active Energy (kWh),03 Aspirator_Electric_Active Energy (kWh),04 Compressed Air_Electric_Active Energy (kWh),05 Weaving_Electric_Active Energy (kWh),06 Ironing_Electric_Active Energy (kWh),07 UPS_Electric_Active Energy (kWh),08 General Services_Electric_Active Energy (kWh),...,10 Warehouses_Electric_Active Power (kW),11 Winding_Electric_Active Power (kW),12 Others_Electric_Active Power (kW),General_Technical_Active Power (kW),General_Technical_Flow Rate (m3/h),is_weekend,month_sin,month_cos,day_of_week_sin,day_of_week_cos
0,industrial_site3,2022-07-01 00:00:00,579.0,82.6,103.0,144.0,98.5,73.7,0.536,8.37,...,10.8,3.63,491.0,0.08,750.0,0,-0.5,-0.866025,-0.433884,-0.900969
1,industrial_site3,2022-07-01 00:15:00,513.0,80.5,103.0,132.0,97.8,49.9,0.552,7.71,...,10.6,3.02,476.0,0.144,928.0,0,-0.5,-0.866025,-0.433884,-0.900969
2,industrial_site3,2022-07-01 00:30:00,505.0,90.1,118.0,138.0,107.0,47.6,0.484,8.6,...,10.6,2.96,482.0,-0.036,588.0,0,-0.5,-0.866025,-0.433884,-0.900969
3,industrial_site3,2022-07-01 00:45:00,512.0,85.0,103.0,131.0,98.2,45.5,0.484,7.84,...,10.6,7.0,477.0,-0.14,638.0,0,-0.5,-0.866025,-0.433884,-0.900969
4,industrial_site3,2022-07-01 01:00:00,510.0,84.6,103.0,132.0,97.8,45.5,0.488,7.78,...,10.6,6.77,470.0,-1.2,844.0,0,-0.5,-0.866025,-0.433884,-0.900969




Preview of DataFrame for industrial_site1:


Unnamed: 0,DataFrame Name,Time,01 Normal General_Electric_Active Energy (kWh),02 Chiller_Electric_Active Energy (kWh),03 Dyeing_Electric_Active Energy (kWh),04 Ironing_Electric_Active Energy (kWh),05 Purifier_Electric_Active Energy (kWh),07 Technological Centers_Electric_Active Energy (kWh),08 Offices Changing Rooms_Electric_Active Energy (kWh),09 Compressed Air_Electric_Active Energy (kWh),...,17 Print_Electric_Active Power (kW),General_Electric_Active Power (kW),General_Natural Gas_Flow Rate (m3/h),General_Vapour_Flow Rate (m3/h),General_Water_Flow Rate (m3/h),is_weekend,month_sin,month_cos,day_of_week_sin,day_of_week_cos
0,industrial_site1,2022-07-01 00:00:00,441.0,60.5,60.4,47.4,27.9,32.7,14.6,37.6,...,69.3,471.0,253.0,8813.0,766.0,0,-0.5,-0.866025,-0.433884,-0.900969
1,industrial_site1,2022-07-01 00:15:00,412.0,60.7,57.2,46.2,22.5,27.7,17.8,40.7,...,106.0,480.0,0.09,8636.0,404.0,0,-0.5,-0.866025,-0.433884,-0.900969
2,industrial_site1,2022-07-01 00:30:00,440.0,56.4,62.4,40.3,24.0,30.6,17.7,41.6,...,104.0,519.0,864.0,16272.0,784.0,0,-0.5,-0.866025,-0.433884,-0.900969
3,industrial_site1,2022-07-01 00:45:00,448.0,54.4,60.0,48.6,24.7,30.7,17.8,40.1,...,109.0,510.0,275.0,11009.0,803.0,0,-0.5,-0.866025,-0.433884,-0.900969
4,industrial_site1,2022-07-01 01:00:00,410.0,53.8,59.5,47.7,24.7,31.5,17.5,41.4,...,111.0,520.0,417.0,10018.0,665.0,0,-0.5,-0.866025,-0.433884,-0.900969






# Create Target Column

## Create Total Consumption (kW) Column

In [10]:
# Assuming you have already loaded the dataframes into the `dataframes` dictionary
target_columns = ['Active Energy']

for col_name in target_columns:
    dataframes = biu.create_total_column_by_units(dataframes, col_name, verbose=True)

Attempting to read the units dictionary from 'unit_dict.json'
Successfully read the units dictionary from 'unit_dict.json'
Dictionary content: {'Active Energy': 'kWh', 'Active Power': 'kW', 'Power Factor': 'real', 'Efficiency': '%', 'Steam': 'kg', 'Flow Rate': 'm3/h', 'Pressure': 'bar', 'Temperature': 'C', 'Quantity': 'm3'}
Processing DataFrame: industrial_site2
Columns to sum for 'kWh': ['01 General_Electric_Active Energy (kWh)', '02 Production_Electric_Active Energy (kWh)', '03 Chiller Group_Electric_Active Energy (kWh)', '04 UTA_Electric_Active Energy (kWh)', '05 Compressors_Electric_Active Energy (kWh)', '06 Offices_Electric_Active Energy (kWh)', '07 Data Center_Electric_Active Energy (kWh)', '08 Technological Centers_Electric_Active Energy (kWh)', 'General_Electric_Active Energy (kWh)']
DataFrame: industrial_site2 - Created column: Total_Active Energy_kWh summing columns: ['01 General_Electric_Active Energy (kWh)', '02 Production_Electric_Active Energy (kWh)', '03 Chiller Group_El

In [11]:
dataframes = biu.create_total_column_by_units(dataframes, 'Active Energy', verbose=True)

Attempting to read the units dictionary from 'unit_dict.json'
Successfully read the units dictionary from 'unit_dict.json'
Dictionary content: {'Active Energy': 'kWh', 'Active Power': 'kW', 'Power Factor': 'real', 'Efficiency': '%', 'Steam': 'kg', 'Flow Rate': 'm3/h', 'Pressure': 'bar', 'Temperature': 'C', 'Quantity': 'm3'}
Processing DataFrame: industrial_site2
Columns to sum for 'kWh': ['01 General_Electric_Active Energy (kWh)', '02 Production_Electric_Active Energy (kWh)', '03 Chiller Group_Electric_Active Energy (kWh)', '04 UTA_Electric_Active Energy (kWh)', '05 Compressors_Electric_Active Energy (kWh)', '06 Offices_Electric_Active Energy (kWh)', '07 Data Center_Electric_Active Energy (kWh)', '08 Technological Centers_Electric_Active Energy (kWh)', 'General_Electric_Active Energy (kWh)']
DataFrame: industrial_site2 - Created column: Total_Active Energy_kWh summing columns: ['01 General_Electric_Active Energy (kWh)', '02 Production_Electric_Active Energy (kWh)', '03 Chiller Group_El

In [12]:
du.check_column_names(dataframes)

DataFrame: industrial_site2
Columns: ['DataFrame Name', 'Time', '01 General_Electric_Active Energy (kWh)', '02 Production_Electric_Active Energy (kWh)', '03 Chiller Group_Electric_Active Energy (kWh)', '04 UTA_Electric_Active Energy (kWh)', '05 Compressors_Electric_Active Energy (kWh)', '06 Offices_Electric_Active Energy (kWh)', '07 Data Center_Electric_Active Energy (kWh)', '08 Technological Centers_Electric_Active Energy (kWh)', 'General_Electric_Active Energy (kWh)', '01 General_Electric_Power Factor (real)', '02 Production_Electric_Power Factor (real)', '03 Chiller Group_Electric_Power Factor (real)', '04 UTA_Electric_Power Factor (real)', '05 Compressors_Electric_Power Factor (real)', '06 Offices_Electric_Power Factor (real)', '07 Data Center_Electric_Power Factor (real)', '08 Technological Centers_Electric_Power Factor (real)', 'General_Electric_Power Factor (real)', '01 General_Electric_Active Power (kW)', '02 Production_Electric_Active Power (kW)', '03 Chiller Group_Electric_Ac

# Data Cleaning

## Imputing missing values with column averages by each dataframe

In [13]:
dataframes = du.impute_missing_values(dataframes)

DataFrame: industrial_site2 - NaNs filled with column means
DataFrame: industrial_site3 - NaNs filled with column means
DataFrame: industrial_site1 - NaNs filled with column means


In [14]:
du.nan_checker(dataframes)

Unnamed: 0,DataFrame Name,01 General Transformer 1234_Electric_Active Energy (kWh),01 General Transformer 1234_Electric_Active Power (kW),01 General Transformer 1234_Electric_Power Factor (real),01 General_Electric_Active Energy (kWh),01 General_Electric_Active Power (kW),01 General_Electric_Power Factor (real),01 Normal General_Electric_Active Energy (kWh),01 Normal General_Electric_Active Power (kW),01 Normal General_Electric_Power Factor (real),...,General_Vapour_Pressure (bar),General_Vapour_Quantity (m3),General_Water_Flow Rate (m3/h),Time,Total_Active Energy_kWh,day_of_week_cos,day_of_week_sin,is_weekend,month_cos,month_sin
0,No NaNs,Column not present,Column not present,Column not present,No NaNs,No NaNs,No NaNs,Column not present,Column not present,Column not present,...,Column not present,Column not present,Column not present,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs
1,No NaNs,No NaNs,No NaNs,No NaNs,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,...,Column not present,Column not present,Column not present,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs
2,No NaNs,Column not present,Column not present,Column not present,Column not present,Column not present,Column not present,No NaNs,No NaNs,No NaNs,...,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs,No NaNs


# Ingest Price Data

In [15]:
pd.DataFrame(du.get_min_max_dates(dataframes))

Unnamed: 0,industrial_site2,industrial_site3,industrial_site1
earliest_date,2022-07-01 00:00:00,2022-07-01 00:00:00,2022-07-01 00:00:00
latest_date,2023-09-19 09:45:00,2023-09-19 09:45:00,2023-09-19 09:45:00


In [16]:
init_date = pd.to_datetime('2022-07-01 00:00:00')
end_date = pd.to_datetime('2023-09-19 09:45:00')

price_df = epu.import_omie_marginal_price_data(init_date, end_date, verbose=True)

Attempting to import OMIE marginal price data from 2022-07-01 00:00:00 to 2023-09-19 09:45:00...
Requesting https://www.omie.es/sites/default/files/dados/AGNO_2022/MES_07/TXT/INT_PBC_EV_H_1_01_07_2022_01_07_2022.TXT ...
There was error processing file: https://www.omie.es/sites/default/files/dados/AGNO_2022/MES_07/TXT/INT_PBC_EV_H_1_01_07_2022_01_07_2022.TXT
unsupported locale settinghttps://www.omie.es/sites/default/files/dados/AGNO_2022/MES_07/TXT/INT_PBC_EV_H_1_01_07_2022_01_07_2022.TXT
Requesting https://www.omie.es/sites/default/files/dados/AGNO_2022/MES_07/TXT/INT_PBC_EV_H_1_02_07_2022_02_07_2022.TXT ...
There was error processing file: https://www.omie.es/sites/default/files/dados/AGNO_2022/MES_07/TXT/INT_PBC_EV_H_1_02_07_2022_02_07_2022.TXT
unsupported locale settinghttps://www.omie.es/sites/default/files/dados/AGNO_2022/MES_07/TXT/INT_PBC_EV_H_1_02_07_2022_02_07_2022.TXT
Requesting https://www.omie.es/sites/default/files/dados/AGNO_2022/MES_07/TXT/INT_PBC_EV_H_1_03_07_2022_03_

KeyboardInterrupt: 

# Getting Constraint Equations

In [None]:
# constraints = mu.store_constraints(dataframes)

# for name, constraint in constraints.items():
#     print(f"DataFrame: {name}")
#     print(f"Total Floor Consumption: {constraint['total_floor_consumption']}")
#     print(f"Total Zone Consumption: {constraint['total_zone_consumption']}")
#     print(f"Total AC Consumption by Zone: {constraint['total_ac_consumption_by_zone']}")