In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer
import numpy as np

def transform_data_with_columntransformer(df: pd.DataFrame):
    """
    This function takes a pandas DataFrame, applies a series of transformations
    using ColumnTransformer, and returns features (X) and target variables (y).
    """

    df['data_received_on'] = pd.to_datetime(df['data_received_on'])
    df['data_received_on_naive'] = df['data_received_on'].dt.tz_localize(None)

    df.sort_values('data_received_on_naive', inplace=True)

    converted_df = df.pivot_table(
        index=['data_received_on_naive', 'site', 'system_type'],
        columns='datapoint',
        values='monitoring_data',
        aggfunc='first'
    )
    converted_df.reset_index(inplace=True)

    numeric_cols = [
        "RA Damper feedback", "SA Pressure setpoint", "OA Humid", "RA Temp",
        "RA CO2", "RA CO2 setpoint", "SA Fan Speed feedback", "SA Fan Speed control",
        "RA Temp control( Valve Feedback)", "SA pressure", "Fan Power meter (KW)",
        "RA damper control", "OA Temp", "OA Flow", "SA temp", "RA  temperature setpoint"
    ]
    present_numeric_cols = [col for col in numeric_cols if col in converted_df.columns]
    converted_df[present_numeric_cols] = converted_df[present_numeric_cols].apply(pd.to_numeric, errors='coerce')

    cols_to_drop = [
        'site', 'system_type', 'Bag filter dirty status', 'Plant enable',
        'Trip status', 'airflow Status', 'auto Status', 'pre Filter dirty staus'
    ]
    converted_df.drop(columns=cols_to_drop, inplace=True, errors='ignore')

    if "Sup fan cmd" in converted_df.columns:
        mappings = {'active': 1, 'inactive': 0}
        converted_df["Sup fan cmd"] = converted_df["Sup fan cmd"].replace(mappings)

    target_columns = ["RA damper control", "RA Temp control( Valve Feedback)", "SA Fan Speed control", "Fan Power meter (KW)"]
    present_target_cols = [col for col in target_columns if col in converted_df.columns]
    
    converted_df.dropna(subset=present_target_cols, inplace=True)

    y = converted_df[present_target_cols]
    X = converted_df.drop(columns=present_target_cols, errors='ignore')

    numeric_features = [col for col in X.columns if pd.api.types.is_numeric_dtype(X[col]) and col != 'Sup fan cmd']
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', MinMaxScaler(), numeric_features)
        ],
        remainder='passthrough'  
    )

    X_transformed = preprocessor.fit_transform(X)


    passthrough_cols = [col for col in X.columns if col not in numeric_features]
    transformed_cols = numeric_features + passthrough_cols
    X = pd.DataFrame(X_transformed, columns=transformed_cols, index=X.index)


    combined = pd.concat([X, y], axis=1)
    combined.dropna(inplace=True)
    X = combined[X.columns]
    y = combined[y.columns]


    return X, y,preprocessor




In [4]:
df = pd.read_csv('C:/Users/debas/OneDrive/Desktop/output.csv')
df

  df = pd.read_csv('C:/Users/debas/OneDrive/Desktop/output.csv')


Unnamed: 0,service_id,asset_code,site,system_type,device_id,device_ip,object_name,equipment_name,equipment_id,data_received_on,datapoint,monitoring_data,service_status,subsystem,system_id
0,FIR-FCU-01On/Off Cmd,FIR-FCU-01,First Floor,FCU,170101,192.168.170.101,First Floor\\FCU-1F-02 On/Off Cmd,FCU-1F-02,FCU-1F-02,2025-08-21T08:11:48.133 UTC,On/Off Cmd,active,normal,-,-
1,FIR-FCU-01On/Off Cmd,FIR-FCU-01,First Floor,FCU,170101,192.168.170.101,First Floor\\FCU-1F-02 On/Off Cmd,FCU-1F-02,FCU-1F-02,2025-08-21T08:08:47.990 UTC,On/Off Cmd,active,normal,-,-
2,FIR-FCU-01On/Off Cmd,FIR-FCU-01,First Floor,FCU,170101,192.168.170.101,First Floor\\FCU-1F-02 On/Off Cmd,FCU-1F-02,FCU-1F-02,2025-08-21T08:05:48.796 UTC,On/Off Cmd,active,normal,-,-
3,FIR-FCU-01On/Off Cmd,FIR-FCU-01,First Floor,FCU,170101,192.168.170.101,First Floor\\FCU-1F-02 On/Off Cmd,FCU-1F-02,FCU-1F-02,2025-08-21T08:02:48.708 UTC,On/Off Cmd,active,normal,-,-
4,FIR-FCU-01On/Off Cmd,FIR-FCU-01,First Floor,FCU,170101,192.168.170.101,First Floor\\FCU-1F-02 On/Off Cmd,FCU-1F-02,FCU-1F-02,2025-08-21T07:59:48.964 UTC,On/Off Cmd,active,normal,-,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14428292,GRO-FCU-05Ret temp,GRO-FCU-05,Ground Floor,FCU,170101,192.168.170.101,Ground Floor\\FCU\\FCU-GF-05 Ret temp,FCU-GF-05,FCU-GF-05,2025-05-23T12:57:34.673 UTC,Ret temp,41.72909164428711,{},-,-
14428293,GRO-FCU-05Ret temp,GRO-FCU-05,Ground Floor,FCU,170101,192.168.170.101,Ground Floor\\FCU\\FCU-GF-05 Ret temp,FCU-GF-05,FCU-GF-05,2025-05-23T12:54:28.494 UTC,Ret temp,40.109561920166016,{},-,-
14428294,ROO-CHI-63Secondary pump,ROO-CHI-63,Rooftop,Chiller system,170101,192.168.170.101,Rooftop\\Chiller system\\Pumps\\Secondary pump...,Pumps,Pumps,2025-05-23T13:03:40.368 UTC,Secondary pump,inactive,{},-,-
14428295,ROO-CHI-63Secondary pump,ROO-CHI-63,Rooftop,Chiller system,170101,192.168.170.101,Rooftop\\Chiller system\\Pumps\\Secondary pump...,Pumps,Pumps,2025-05-23T13:00:38.727 UTC,Secondary pump,inactive,{},-,-


In [6]:
up_df = df[(df['site'] == "Ground Floor") & (df['system_type'] == "AHU")]

In [7]:
X, y = transform_data_with_columntransformer(df=up_df)

# Display the results
print("--- Features (X) ---")
print(X.head())
print("\n--- Target (y) ---")
print(y.head())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['data_received_on'] = pd.to_datetime(df['data_received_on'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['data_received_on_naive'] = df['data_received_on'].dt.tz_localize(None)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values('data_received_on_naive', inplace=True)


--- Features (X) ---
  OA Flow  OA Humid   OA Temp RA  temperature setpoint    RA CO2  \
0     0.0  0.345492  0.296632                 0.555556  0.956773   
1     0.0  0.552264  0.552264                 0.555556       1.0   
2     0.0      0.75  0.703368                 0.555556  0.956773   
3     0.0  0.871572  0.834565                 0.555556  0.871572   
4     0.0  0.997261       1.0                 0.555556       0.5   

  RA CO2 setpoint RA Damper feedback   RA Temp SA Fan Speed feedback  \
0        0.944009                0.0  0.396044                   0.0   
1        0.944009                0.0  0.654509                   0.0   
2        0.944009                0.0  0.793893                   0.0   
3        0.944009                0.0  0.904509                   0.0   
4        0.944009                0.0  0.989074                   0.0   

  SA Pressure setpoint SA pressure   SA temp  data_received_on_naive  \
0             0.090909    0.956773  0.447736 2025-05-23 12:54:28.

  converted_df["Sup fan cmd"] = converted_df["Sup fan cmd"].replace(mappings)


In [9]:
X

Unnamed: 0,OA Flow,OA Humid,OA Temp,RA temperature setpoint,RA CO2,RA CO2 setpoint,RA Damper feedback,RA Temp,SA Fan Speed feedback,SA Pressure setpoint,SA pressure,SA temp,data_received_on_naive,Sup fan cmd
0,0.0,0.345492,0.296632,0.555556,0.956773,0.944009,0.0,0.396044,0.0,0.090909,0.956773,0.447736,2025-05-23 12:54:28.494,0.0
1,0.0,0.552264,0.552264,0.555556,1.0,0.944009,0.0,0.654509,0.0,0.090909,1.0,0.654509,2025-05-23 12:57:34.673,0.0
2,0.0,0.75,0.703368,0.555556,0.956773,0.944009,0.0,0.793893,0.0,0.090909,0.975528,0.793892,2025-05-23 13:00:38.727,0.0
3,0.0,0.871572,0.834565,0.555556,0.871572,0.944009,0.0,0.904509,0.0,0.090909,0.904508,0.904509,2025-05-23 13:03:40.368,0.0
4,0.0,0.997261,1.0,0.555556,0.5,0.944009,0.0,0.989074,0.0,0.090909,0.552264,0.975528,2025-05-23 13:06:50.450,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36072,0.999866,0.296632,0.25,0.5,0.933013,0.832027,0.15,0.345491,1.0,0.545455,0.933013,0.396044,2025-08-21 07:59:48.964,1.0
36073,0.999866,0.296632,0.25,0.5,0.933013,0.832027,0.15,0.345491,1.0,0.545455,0.904508,0.345491,2025-08-21 08:02:48.708,1.0
36074,0.999467,0.296632,0.25,0.5,0.904508,0.832027,0.15,0.345491,1.0,0.545455,0.904508,0.345491,2025-08-21 08:05:48.796,1.0
36075,0.998806,0.25,0.206107,0.5,0.871572,0.832027,0.15,0.296632,1.0,0.545455,0.871573,0.296632,2025-08-21 08:08:47.990,1.0


In [3]:
x = {
    "Bag filter dirty status": 0,
    "OA Flow": 819.8904418945312,
    "OA Humid": 55.93263626098633,
    "OA Temp": 35.93263626098633,
    "Plant enable": 1,
    "RA  temperature setpoint": 24.5,
    "RA CO2": 500,
}

print(f"{x}")

{'Bag filter dirty status': 0, 'OA Flow': 819.8904418945312, 'OA Humid': 55.93263626098633, 'OA Temp': 35.93263626098633, 'Plant enable': 1, 'RA  temperature setpoint': 24.5, 'RA CO2': 500}
