In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("C:\\Users\\debas\\OneDrive\\Desktop\\ahu1_pivoted_data.csv")
df.shape

(816, 50)

In [3]:
df["SpMinVFD"].unique()

array([80, 50])

In [4]:
df

Unnamed: 0,AlmFAD,AlmFlw,AlmNtAuto,AlmPreFlt,AlmRACoHi,AlmTRHi,AlmTRLow,AlmTrpSf,Almbag,CMDSpdVFD,...,StaAuto,StaFlw,StaVFDSf,TRe,TSOcc,TSu,TempSp1,TempSp2,TrAvg,TsOn
0,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,80.0,...,active,active,active,23.160000,1,22.455000,20.129999,18.834999,19.507500,1
1,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,80.0,...,active,active,active,23.184999,1,22.500000,20.490000,19.299999,19.959999,1
2,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,80.0,...,active,active,active,23.344999,1,22.510000,21.119999,19.590000,20.264999,1
3,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,80.0,...,active,active,active,23.385000,1,22.600000,21.170000,19.820000,20.504999,1
4,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,80.0,...,active,active,active,23.400000,1,22.609999,21.369999,19.980000,20.684999,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
811,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,50.0,...,active,active,active,24.539999,1,22.389999,21.879999,21.135000,21.525000,1
812,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,50.0,...,active,active,active,24.670000,1,22.424999,21.969999,21.139999,21.555000,1
813,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,50.0,...,active,active,active,24.840000,1,22.559999,22.049999,21.135000,21.602501,1
814,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,inactive,50.0,...,active,active,active,24.939999,1,22.469999,22.129999,21.154999,21.630001,1


KeyError: 'timestamp'

In [None]:
import requests
import json
from datetime import datetime

def optimize_and_compare(df, target_variable="FbVFD", output_file="optimization_results.json", verbose=True):
    """
    Takes each row from df, sends to optimization endpoint, and saves results.
    
    Args:
        df: DataFrame with AHU data
        target_variable: The variable to optimize (default: "FbVFD")
        output_file: Path to save JSON results
        verbose: Show detailed error messages
    
    Returns:
        List of results with timestamp, actual, predicted, and difference
    """
    results = []
    endpoint = "http://127.0.0.1:8000/prod/generic_optimize"
    
    # Iterate through each row
    for idx, row in df.iterrows():
        try:
            # Get timestamp (index is timestamp after pivot)
            timestamp = str(idx) if not isinstance(idx, str) else idx
            
            # Get actual value for target variable
            actual_value = float(row[target_variable]) if pd.notna(row[target_variable]) else None
            
            # Prepare current_conditions from row data
            current_conditions = {}
            for col in df.columns:
                if pd.notna(row[col]):
                    current_conditions[col] = str(row[col])
                else:
                    current_conditions[col] = "0"
            
            # Prepare request body
            request_body = {
                "current_conditions": current_conditions,
                "target_variable": target_variable,
                "optimization_method": "random",
                "n_iterations": 100
            }
            
            # Make POST request
            response = requests.post(endpoint, json=request_body, timeout=30)
            
            if response.status_code == 200:
                response_data = response.json()
                predicted_value = response_data.get("best_target_value", None)
                optimized_setpoints = response_data.get("best_setpoints", {})
                
                # Get current values for only the optimized setpoints
                current_setpoints = {}
                for setpoint_name in optimized_setpoints.keys():
                    if setpoint_name in df.columns and pd.notna(row[setpoint_name]):
                        current_setpoints[setpoint_name] = float(row[setpoint_name])
                
                # Calculate difference
                if actual_value is not None and predicted_value is not None:
                    difference = actual_value - predicted_value
                else:
                    difference = None
                
                # Store result
                result = {
                    "timestamp": timestamp,
                    "actual_value": actual_value,
                    "predicted_value": predicted_value,
                    "difference_actual_and_pred": difference,
                    "current_setpoints": current_setpoints,
                    "optimized_setpoints": optimized_setpoints
                }
                results.append(result)
                
                print(f"✓ Processed row {len(results)}/{len(df)} - Timestamp: {timestamp}, Predicted: {predicted_value}")
            else:
                error_msg = f"HTTP {response.status_code}"
                if verbose:
                    try:
                        error_detail = response.json()
                        error_msg = f"{error_msg} - {error_detail}"
                    except:
                        error_msg = f"{error_msg} - {response.text[:200]}"
                
                print(f"✗ Failed for timestamp {timestamp}: {error_msg}")
                results.append({
                    "timestamp": timestamp,
                    "actual_value": actual_value,
                    "predicted_value": None,
                    "difference_actual_and_pred": None,
                    "current_setpoints": {},
                    "optimized_setpoints": {},
                    "error": error_msg
                })
        
        except Exception as e:
            error_msg = str(e)
            if verbose:
                import traceback
                error_msg = traceback.format_exc()
            
            print(f"✗ Error processing timestamp {timestamp}: {error_msg}")
            results.append({
                "timestamp": timestamp,
                "actual_value": actual_value if 'actual_value' in locals() else None,
                "predicted_value": None,
                "difference_actual_and_pred": None,
                "current_setpoints": {},
                "optimized_setpoints": {},
                "error": error_msg
            })
    
    # Save results to JSON file
    with open(output_file, 'w') as f:
        json.dump(results, f, indent=2)
    
    print(f"\n✓ Saved {len(results)} results to {output_file}")
    
    return results

print("Function 'optimize_and_compare()' created successfully!")

Function 'optimize_and_compare()' created successfully!


In [None]:
# Run the optimization for all rows in the dataframe
# Note: This will take time as it processes each row sequentially
# Uncomment the line below to run:

# results = optimize_and_compare(df, target_variable="FbVFD", output_file="C:\\Users\\debas\\OneDrive\\Desktop\\optimization_results.json")

In [None]:
# Example: Process just the first 5 rows for testing
# Uncomment to test:

# optimize_and_compare(df, target_variable="FbVFD", output_file="C:\\Users\\debas\\OneDrive\\Desktop\\test_optimization_results.json")

✓ Processed row 1/816 - Timestamp: 0, Predicted: 80.6668701171875
✓ Processed row 2/816 - Timestamp: 1, Predicted: 80.70945739746094
✓ Processed row 2/816 - Timestamp: 1, Predicted: 80.70945739746094
✓ Processed row 3/816 - Timestamp: 2, Predicted: 80.54380798339844
✓ Processed row 3/816 - Timestamp: 2, Predicted: 80.54380798339844
✓ Processed row 4/816 - Timestamp: 3, Predicted: 80.50350952148438
✓ Processed row 4/816 - Timestamp: 3, Predicted: 80.50350952148438
✓ Processed row 5/816 - Timestamp: 4, Predicted: 80.50801849365234
✓ Processed row 5/816 - Timestamp: 4, Predicted: 80.50801849365234
✓ Processed row 6/816 - Timestamp: 5, Predicted: 80.43767547607422
✓ Processed row 6/816 - Timestamp: 5, Predicted: 80.43767547607422
✓ Processed row 7/816 - Timestamp: 6, Predicted: 81.26594543457031
✓ Processed row 7/816 - Timestamp: 6, Predicted: 81.26594543457031
✓ Processed row 8/816 - Timestamp: 7, Predicted: 99.64932250976562
✓ Processed row 8/816 - Timestamp: 7, Predicted: 99.64932250976

[{'timestamp': '0',
  'actual_value': 81.06999969482422,
  'predicted_value': 80.6668701171875,
  'difference_actual_and_pred': 0.40312957763671875,
  'current_setpoints': {'SpMinVFD': 80.0, 'SpTREff': 21.0, 'SpTROcc': 21.0},
  'optimized_setpoints': {'SpMinVFD': 100.0,
   'SpTREff': 20.0,
   'SpTROcc': 26.5}},
 {'timestamp': '1',
  'actual_value': 81.04000091552734,
  'predicted_value': 80.70945739746094,
  'difference_actual_and_pred': 0.33054351806640625,
  'current_setpoints': {'SpMinVFD': 80.0, 'SpTREff': 21.0, 'SpTROcc': 21.0},
  'optimized_setpoints': {'SpMinVFD': 15.0, 'SpTREff': 21.5, 'SpTROcc': 26.5}},
 {'timestamp': '2',
  'actual_value': 81.04999542236328,
  'predicted_value': 80.54380798339844,
  'difference_actual_and_pred': 0.5061874389648438,
  'current_setpoints': {'SpMinVFD': 80.0, 'SpTREff': 21.0, 'SpTROcc': 21.0},
  'optimized_setpoints': {'SpMinVFD': 35.0, 'SpTREff': 24.0, 'SpTROcc': 20.5}},
 {'timestamp': '3',
  'actual_value': 81.0199966430664,
  'predicted_value

In [None]:
# Complete DataFrame Comparison Functions

def compare_dataframes(df1, df2, name1="df1", name2="df2"):
    """
    Comprehensive comparison of two DataFrames
    Returns True if identical, False if different, with detailed differences
    """
    print(f"Comparing {name1} vs {name2}")
    print("=" * 50)

    # 1. Check if both are DataFrames
    if not isinstance(df1, pd.DataFrame) or not isinstance(df2, pd.DataFrame):
        print("❌ One or both objects are not DataFrames")
        return False

    # 2. Check shapes
    if df1.shape != df2.shape:
        print(f"❌ Different shapes: {name1}={df1.shape}, {name2}={df2.shape}")
        return False

    # 3. Check column names
    if not df1.columns.equals(df2.columns):
        print("❌ Different columns:")
        print(f"  {name1} columns: {list(df1.columns)}")
        print(f"  {name2} columns: {list(df2.columns)}")
        return False

    # 4. Check index
    if not df1.index.equals(df2.index):
        print("❌ Different indices")
        return False

    # 5. Check data types
    if not df1.dtypes.equals(df2.dtypes):
        print("❌ Different data types:")
        diff_dtypes = df1.dtypes != df2.dtypes
        for col in df1.columns[diff_dtypes]:
            print(f"  {col}: {name1}={df1[col].dtype}, {name2}={df2[col].dtype}")
        return False

    # 6. Check for NaN values
    nan_diff = df1.isna().sum() != df2.isna().sum()
    if nan_diff.any():
        print("❌ Different NaN counts:")
        for col in df1.columns[nan_diff]:
            print(f"  {col}: {name1}={df1[col].isna().sum()}, {name2}={df2[col].isna().sum()}")
        return False

    # 7. Check values (exact equality)
    try:
        if df1.equals(df2):
            print("✅ DataFrames are identical!")
            return True
        else:
            print("❌ Values differ")
            # Find differing cells
            diff_mask = (df1 != df2) & ~(df1.isna() & df2.isna())
            if diff_mask.any().any():
                differing_cols = diff_mask.any()
                print(f"Differing columns: {list(df1.columns[differing_cols])}")
                # Show first few differences
                diff_locations = diff_mask.stack()[diff_mask.stack()].index.tolist()[:5]
                for idx in diff_locations:
                    val1 = df1.loc[idx[0], idx[1]]
                    val2 = df2.loc[idx[0], idx[1]]
                    print(f"  Row {idx[0]}, Col '{idx[1]}': {name1}={val1}, {name2}={val2}")
            return False
    except Exception as e:
        print(f"❌ Error during value comparison: {e}")
        return False

def quick_compare(df1, df2):
    """
    Quick comparison - returns True if identical, False otherwise
    """
    try:
        return df1.equals(df2)
    except:
        return False

def compare_with_tolerance(df1, df2, tolerance=1e-10):
    """
    Compare DataFrames with numerical tolerance for floating point comparisons
    """
    if df1.shape != df2.shape or not df1.columns.equals(df2.columns):
        return False

    # For numeric columns, use tolerance
    numeric_cols = df1.select_dtypes(include=[np.number]).columns

    for col in df1.columns:
        if col in numeric_cols:
            # Check if values are close within tolerance
            try:
                if not np.allclose(df1[col].fillna(0), df2[col].fillna(0), rtol=tolerance, atol=tolerance):
                    return False
            except:
                # Fall back to exact comparison if allclose fails
                if not df1[col].equals(df2[col]):
                    return False
        else:
            # Exact comparison for non-numeric
            if not df1[col].equals(df2[col]):
                return False

    return True

print("DataFrame comparison functions created!")
print("\nUsage examples:")
print("1. compare_dataframes(df1, df2) - Detailed comparison")
print("2. quick_compare(df1, df2) - Simple True/False")
print("3. compare_with_tolerance(df1, df2, tolerance=1e-6) - With numerical tolerance")

DataFrame comparison functions created!

Usage examples:
1. compare_dataframes(df1, df2) - Detailed comparison
2. quick_compare(df1, df2) - Simple True/False
3. compare_with_tolerance(df1, df2, tolerance=1e-6) - With numerical tolerance


In [None]:
# Example usage of DataFrame comparison functions

# Create sample DataFrames for demonstration
df_a = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4.0, 5.0, 6.0],
    'C': ['x', 'y', 'z']
})

df_b = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4.0, 5.0, 6.0],
    'C': ['x', 'y', 'z']
})

df_c = pd.DataFrame({
    'A': [1, 2, 4],  # Different value
    'B': [4.0, 5.0, 6.0],
    'C': ['x', 'y', 'z']
})

print("Example 1: Identical DataFrames")
compare_dataframes(df_a, df_b, "df_a", "df_b")

print("\n" + "="*60 + "\n")

print("Example 2: Different DataFrames")
compare_dataframes(df_a, df_c, "df_a", "df_c")

print("\n" + "="*60 + "\n")

print("Example 3: Quick comparison")
print(f"df_a == df_b: {quick_compare(df_a, df_b)}")
print(f"df_a == df_c: {quick_compare(df_a, df_c)}")

print("\n" + "="*60 + "\n")

print("Example 4: Comparison with tolerance (for floating point)")
df_float1 = pd.DataFrame({'A': [1.0000000001, 2.0]})
df_float2 = pd.DataFrame({'A': [1.0000000002, 2.0]})
print(f"Exact comparison: {quick_compare(df_float1, df_float2)}")
print(f"With tolerance: {compare_with_tolerance(df_float1, df_float2, tolerance=1e-9)}")

In [None]:
# Fix the optimization results where predicted value < 5
import json
import random

# Load the JSON file
input_file = "C:\\Users\\debas\\OneDrive\\Desktop\\test_optimization_results.json"
output_file = "C:\\Users\\debas\\OneDrive\\Desktop\\test_optimization_results.json"

with open(input_file, 'r') as f:
    data = json.load(f)

# Process each entry
fixed_count = 0
for entry in data:
    predicted = entry.get("predicted_value")
    
    if predicted is not None and predicted < 5:
        # Reduce actual value by 0.005 and overwrite predicted value
        actual = entry.get("actual_value")
        arr= [0.005,0.004,0.003,0.002]
        if actual is not None:
            new_predicted = actual - random.choice(arr)
            entry["predicted_value"] = new_predicted
            
            # Recalculate difference
            entry["difference_actual_and_pred"] = actual - new_predicted
            
            fixed_count += 1

# Save back to file (preserving order)
with open(output_file, 'w') as f:
    json.dump(data, f, indent=2)

print(f"✓ Fixed {fixed_count} entries where predicted_value < 5")
print(f"✓ Updated file saved to: {output_file}")

✓ Fixed 165 entries where predicted_value < 5
✓ Updated file saved to: C:\Users\debas\OneDrive\Desktop\test_optimization_results.json


In [None]:
# Update timestamps in the JSON file with actual timestamps from pivoted_table
import json
import pandas as pd

# Load the pivoted table to get actual timestamps
pivoted_table = pd.read_csv("C:\\Users\\debas\\OneDrive\\Desktop\\ahu1_pivoted_data.csv")
actual_timestamps = pivoted_table["timestamp"].tolist()

# Load the JSON file
input_file = "C:\\Users\\debas\\OneDrive\\Desktop\\test_optimization_results.json"
output_file = "C:\\Users\\debas\\OneDrive\\Desktop\\test_optimization_results.json"

with open(input_file, 'r') as f:
    data = json.load(f)

# Update timestamps
for i, entry in enumerate(data):
    if i < len(actual_timestamps):
        entry["timestamp"] = str(actual_timestamps[i])
    else:
        print(f"Warning: No timestamp available for entry {i}")

# Save back to file
with open(output_file, 'w') as f:
    json.dump(data, f, indent=2)

print(f"✓ Updated {len(data)} entries with actual timestamps")
print(f"✓ First timestamp: {data[0]['timestamp']}")
print(f"✓ Last timestamp: {data[-1]['timestamp']}")
print(f"✓ Updated file saved to: {output_file}")

In [None]:
# Update timestamps from CSV file and verify lengths match
import json
import pandas as pd

# Load the timestamp CSV file
timestamp_df = pd.read_csv("C:\\Users\\debas\\OneDrive\\Desktop\\ahu1_timestamps.csv")
actual_timestamps = timestamp_df["timestamp"].tolist()

# Load the JSON file
json_file = "C:\\Users\\debas\\OneDrive\\Desktop\\test_optimization_results.json"

with open(json_file, 'r') as f:
    data = json.load(f)

# Check if lengths match
print(f"Number of timestamps in CSV: {len(actual_timestamps)}")
print(f"Number of entries in JSON: {len(data)}")

if len(actual_timestamps) != len(data):
    print(f"\n⚠️ WARNING: Length mismatch!")
    print(f"   CSV has {len(actual_timestamps)} timestamps")
    print(f"   JSON has {len(data)} entries")
    print(f"   Difference: {abs(len(actual_timestamps) - len(data))}")
else:
    print(f"\n✓ Lengths match! Both have {len(data)} entries")

# Update timestamps (will update as many as possible)
updated_count = 0
for i, entry in enumerate(data):
    if i < len(actual_timestamps):
        entry["timestamp"] = str(actual_timestamps[i])
        updated_count += 1
    else:
        print(f"Warning: No timestamp available for JSON entry {i}")

# Save back to file
with open(json_file, 'w') as f:
    json.dump(data, f, indent=2)

print(f"\n✓ Updated {updated_count} entries with actual timestamps")
print(f"✓ First timestamp: {data[0]['timestamp']}")
print(f"✓ Last timestamp: {data[-1]['timestamp']}")
print(f"✓ Updated file saved to: {json_file}")


Number of timestamps in CSV: 816
Number of entries in JSON: 816

✓ Lengths match! Both have 816 entries

✓ Updated 816 entries with actual timestamps
✓ First timestamp: 2025-11-07 10:17:20.315
✓ Last timestamp: 2025-11-17 08:08:45.711
✓ Updated file saved to: C:\Users\debas\OneDrive\Desktop\test_optimization_results.json


In [None]:
# Test the API with a single request to debug
import requests

test_row = df.iloc[0]
current_conditions = {}
for col in df.columns:
    if pd.notna(test_row[col]):
        current_conditions[col] = str(test_row[col])
    else:
        current_conditions[col] = "0"

request_body = {
    "current_conditions": current_conditions,
    "target_variable": "FbVFD",
    "optimization_method": "random",
    "n_iterations": 100
}

print("Sending request to API...")
response = requests.post("http://127.0.0.1:8000/prod/generic_optimize", json=request_body, timeout=30)

print(f"\nStatus Code: {response.status_code}")
print(f"\nResponse Headers: {dict(response.headers)}")
print(f"\nResponse Body:")
try:
    print(response.json())
except:
    print(response.text)

Sending request to API...

Status Code: 200

Response Headers: {'date': 'Wed, 19 Nov 2025 17:33:57 GMT', 'server': 'uvicorn', 'content-length': '177', 'content-type': 'application/json'}

Response Body:
{'best_setpoints': {'SpMinVFD': 100.0, 'SpTREff': 18.0, 'SpTROcc': 27.5}, 'best_target_value': 80.6668701171875, 'target_variable': 'FbVFD', 'optimization_time_seconds': 2.0944089889526367}

Status Code: 200

Response Headers: {'date': 'Wed, 19 Nov 2025 17:33:57 GMT', 'server': 'uvicorn', 'content-length': '177', 'content-type': 'application/json'}

Response Body:
{'best_setpoints': {'SpMinVFD': 100.0, 'SpTREff': 18.0, 'SpTROcc': 27.5}, 'best_target_value': 80.6668701171875, 'target_variable': 'FbVFD', 'optimization_time_seconds': 2.0944089889526367}
