In [1]:
import io
import os
import base64
import logging
import pandas as pd
import plotly.express as px



from data_ingestion.data_loader import DataLoader  # Replace with your actual import
from utilities.logging import setup_logger
from data_ingestion.data_loader import DataLoader

logger = logging.getLogger("my_logger")
logger.setLevel(logging.DEBUG)

2023-12-05 23:51:03,283 - data_loader - INFO - Logging intitialized...


# set the input file and output directory

In [2]:
# file_path = r"C:\Users\ecountrywood\OneDrive - Tesla\FBS\11_7 Temp\11-29\000079_231129_132840.GDS_pandas.xlsx"
# data_dir = r"C:\Users\ecountrywood\OneDrive - Tesla\FBS\11_7 Temp\11-29"
input_file_path = "./data/000084_231201_093120.GDS_pandas.xlsx"
output_data_dir = "./data/test"

In [3]:
data_loader = DataLoader(input_file_path)
data_loader.auto_load(file_path=input_file_path)

2023-12-05 23:51:03,315 - data_loader - INFO - DataLoader object created. Referenceing filename: ./data/000084_231201_093120.GDS_pandas.xlsx
2023-12-05 23:51:03,316 - data_loader - INFO - Autoloader runining...
2023-12-05 23:51:03,317 - data_loader - INFO - Checking file attributes...
2023-12-05 23:51:03,318 - data_loader - INFO - File attributes populated from: 000084_231201_093120.GDS_pandas.xlsx
2023-12-05 23:51:03,319 - data_loader - INFO - Attempting to convert file to CSV...
2023-12-05 23:51:03,319 - data_loader - INFO - Converting Excel file to CSV...
2023-12-05 23:51:03,732 - data_loader - INFO - Converted Excel file to CSV: ./data\000084_231201_093120.GDS_pandas.csv
2023-12-05 23:51:03,733 - data_loader - INFO - Attempting to parse file metadata...
2023-12-05 23:51:03,735 - data_loader - INFO - Identified equipment type: smartdaq
2023-12-05 23:51:03,735 - data_loader - INFO - Determining header...
2023-12-05 23:51:03,737 - data_loader - INFO - Fetching header for smartdaq...
2

In [4]:
# option to enable debvugging
DEBUG = True

# load the dataframe 

In [5]:
df = data_loader.data["data_frame"]

In [6]:
# save a copy of the original df
df = df[[col for col in df.columns if col]]
df_original = df.copy()
if DEBUG: print(df_original.head())

         Date      Time  sec  TC1 Gasket  TC2 Supply Lug  TC3 Backup Lug  \
0  2023/12/01  09:31:20    0        49.7            61.2            51.3   
1  2023/12/01  09:32:00    0        49.7            60.9            51.4   
2  2023/12/01  09:32:40    0        49.7            60.8            51.5   
3  2023/12/01  09:33:20    0        49.7            60.8            51.6   
4  2023/12/01  09:34:00    0        49.7            60.9            51.8   

   TC4 Subpanel Lug  TC5 MSA Busbar  TC6 Glass Door  \
0              55.2            61.8            49.3   
1              55.1            61.5            49.3   
2              55.2            61.5            49.3   
3              55.3            61.7            49.3   
4              55.6            61.9            49.3   

   TC7 Upper Enclosure Ambient  ...  TC36 Winding Internal Side Trans  \
0                         49.4  ...                              46.6   
1                         49.4  ...                              4

# Process Data
## now we have a dataframe for the notebook work
1. re-arrange the rows to match the order in the TDP
2. create a validation dictionary with allowable temps for each component
3. split the data into two tests 25C and 50C
4. for each test, we need max temp of each TC
5. get the corresponding ambient when temp was max
6. validate max temp is below criteria in measured_components allowance dictionary
7. downsample the data into 15 minute intervals
8. combine downsampled data and TC max
9. Transpose the result

# Reorder the columns to match the order of the TDP

In [7]:
# order of the columns in the TDP
tdp_order = [
    "Date",
    "Time",
    "TC1 Gasket",
    "TC2 Supply Lug",
    "TC3 Backup Lug",
    "TC4 Subpanel Lug",
    "TC5 MSA Busbar",
    "TC6 Glass Door",
    "TC7 Upper Enclosure Ambient",
    "TC8 Lower Enclosure Ambient",
    "TC10 Subpanel Tray",
    "TC11 Ambient under MSA Cover",
    "TC12 MSA PCB",
    "TC13 MCB Plastic",
    "TC14 Enclosure Handle",
    "TC15 Top of Enclosure (Plastic)",
    "TC16 Side of Enclosure",
    "TC17 Subpanel Bussing",
    "TC18 Branch Breaker Plastic Top",
    "TC19 Branch Breaker Plastic Midd",
    "TC20 Branch Breaker Bussing Top",
    "TC21 Brach Breaker Bussing Mid",
    "TC22 Transformer Cover Plastic",
    "TC23 RF Window Plastic",
    "TC24 Busbar Tray near MCB",
    "TC25 Busbar Tray Field Wiring",
    "TC26 Busbar Tray MSA Under cover",
    "TC27 Relay Ambient",
    "TC28 MCB Ambient",
    "TC9 Lab Ambient",
    "TC29 Capacitor (C44)",
    "TC42 TACO PCB",
    "TC36 Winding Internal Side Trans",
    "TC37 Windings of Transformer",
    "TC38 Busbar L1 Neut to Relay",
    "TC39 Busbar L2 Neut to Relay",
    "TC40 Cap on Holdup PCB",
    "TC41 Holdup PCB under Power",
]
# order the columns of the original dataframe according to the tdp_order
df_ordered = df[tdp_order]
# test if the order is correct against the order in tdp_order
for i in range(len(tdp_order)):
    if df_ordered.columns[i] != tdp_order[i]:
        print(f'ERROR: {df_ordered.columns[i]} != {tdp_order[i]}')
        break


# Create blank validation dictionary, and populate from limits in UL 916
    keys: measured components
        type: absolute or rise
        value: temp value

In [8]:
# # create a dictionary of the column names with correspondig type, and value
# # type is either "absolute" or "rise"
# absolute_list = [
#     "TC1 Gasket",
#     "TC6 Glass Door",
#     "TC10 Subpanel Tray",
#     "TC12 MSA PCB",
#     "TC13 MCB Plastic",
#     "TC14 Enclosure Handle",
#     "TC15 Top of Enclosure (Plastic)",
#     "TC16 Side of Enclosure",
#     "TC18 Branch Breaker Plastic Top",
#     "TC19 Branch Breaker Plastic Midd",
#     "TC22 Transformer Cover Plastic",
#     "TC23 RF Window Plastic",
#     "TC24 Busbar Tray near MCB",
#     "TC25 Busbar Tray Field Wiring",
#     "TC26 Busbar Tray MSA Under cover",
#     "TC27 Relay Ambient",
#     "TC28 MCB Ambient",
#     "TC29 Capacitor (C44)",
#     "TC42 TACO PCB",
#     "TC36 Winding Internal Side Trans",
#     "TC37 Windings of Transformer",
#     "TC40 Cap on Holdup PCB",
#     "TC41 Holdup PCB under Power",
#     "TC9 Lab Ambient",
# ]
# rise_list = [
#     "TC2 Supply Lug",
#     "TC3 Backup Lug",
#     "TC4 Subpanel Lug",
#     "TC5 MSA Busbar",
#     "TC7 Upper Enclosure Ambient",
#     "TC8 Lower Enclosure Ambient",
#     "TC11 Ambient under MSA Cover",
#     "TC17 Subpanel Bussing",
#     "TC20 Branch Breaker Bussing Top",
#     "TC21 Brach Breaker Bussing Mid",
#     "TC38 Busbar L1 Neut to Relay",
#     "TC39 Busbar L2 Neut to Relay",
# ]
# # generate blank dictionary of the measured components to edit in json
# # measured_components_dict = {value: {"type": "absolute", "value": None} for value in df_ordered.columns[2:]}
# # paste back in the json file
# measured_components_dict ={
#     "TC1 Gasket": {
#         "type": "absolute",
#         "value": 100
#     },
#     "TC2 Supply Lug": {
#         "type": "rise",
#         "value": 50
#     },
#     "TC3 Backup Lug": {
#         "type": "rise",
#         "value": 50
#     },
#     "TC4 Subpanel Lug": {
#         "type": "rise",
#         "value": 50
#     },
#     "TC5 MSA Busbar": {
#         "type": "rise",
#         "value": 65
#     },
#     "TC6 Glass Door": {
#         "type": "absolute",
#         "value": 95
#     },
#     "TC7 Upper Enclosure Ambient": {
#         "type": "rise",
#         "value": null
#     },
#     "TC8 Lower Enclosure Ambient": {
#         "type": "rise",
#         "value": null
#     },
#     "TC10 Subpanel Tray": {
#         "type": "absolute",
#         "value": 125
#     },
#     "TC11 Ambient under MSA Cover": {
#         "type": "rise",
#         "value": null
#     },
#     "TC12 MSA PCB": {
#         "type": "absolute",
#         "value": 130
#     },
#     "TC13 MCB Plastic": {
#         "type": "absolute",
#         "value": 100
#     },
#     "TC14 Enclosure Handle": {
#         "type": "absolute",
#         "value": 60
#     },
#     "TC15 Top of Enclosure (Plastic)": {
#         "type": "absolute",
#         "value": 105
#     },
#     "TC16 Side of Enclosure": {
#         "type": "absolute",
#         "value": 60
#     },
#     "TC17 Subpanel Bussing": {
#         "type": "rise",
#         "value": 65
#     },
#     "TC18 Branch Breaker Plastic Top": {
#         "type": "absolute",
#         "value": 125
#     },
#     "TC19 Branch Breaker Plastic Midd": {
#         "type": "absolute",
#         "value": 125
#     },
#     "TC20 Branch Breaker Bussing Top": {
#         "type": "rise",
#         "value": 65
#     },
#     "TC21 Brach Breaker Bussing Mid": {
#         "type": "rise",
#         "value": 65
#     },
#     "TC22 Transformer Cover Plastic": {
#         "type": "absolute",
#         "value": 105
#     },
#     "TC23 RF Window Plastic": {
#         "type": "absolute",
#         "value": 85
#     },
#     "TC24 Busbar Tray near MCB": {
#         "type": "absolute",
#         "value": 105
#     },
#     "TC25 Busbar Tray Field Wiring": {
#         "type": "absolute",
#         "value": 105
#     },
#     "TC26 Busbar Tray MSA Under cover": {
#         "type": "absolute",
#         "value": 105
#     },
#     "TC27 Relay Ambient": {
#         "type": "absolute",
#         "value": 85
#     },
#     "TC28 MCB Ambient": {
#         "type": "absolute",
#         "value": 40
#     },
#     "TC9 Lab Ambient": {
#         "type": "absolute",
#         "value": null
#     },
#     "TC29 Capacitor (C44)": {
#         "type": "absolute",
#         "value": null
#     },
#     "TC42 TACO PCB": {
#         "type": "absolute",
#         "value": 105
#     },
#     "TC36 Winding Internal Side Trans": {
#         "type": "absolute",
#         "value": 200
#     },
#     "TC37 Windings of Transformer": {
#         "type": "absolute",
#         "value": 200
#     },
#     "TC38 Busbar L1 Neut to Relay": {
#         "type": "rise",
#         "value": 65
#     },
#     "TC39 Busbar L2 Neut to Relay": {
#         "type": "rise",
#         "value": 65
#     },
#     "TC40 Cap on Holdup PCB": {
#         "type": "absolute",
#         "value": 105
#     },
#     "TC41 Holdup PCB under Power": {
#         "type": "absolute",
#         "value": 105
#     }
# }

# # set the type of the rise columns to "rise"
# for key in measured_components_dict.keys():
#     if key in rise_list:
#         measured_components_dict[key]["type"] = "rise"
#     elif key in absolute_list:
#         measured_components_dict[key]["type"] = "absolute"

# # dump this ddictionary to a json file
# import json
# with open('measured_components_dict.json', 'w') as fp:
#     json.dump(measured_components_dict, fp, indent=4)


## Load in the JSON of components and their temperature criteria

In [9]:
# # load the json file to measured_components_dict
import json
with open('edited_measured_components_dict.json', 'r') as fp:
    measured_components_dict = json.load(fp)

# Split data into two test periods based on ambient temp: 25C, 50C
If data represents only one single test, set 'test_1' to 'df_original'

In [10]:
# select the time window of the dataframe for analysis

def split_dataframe_by_time(df, d1_start_time, d1_end_time):
    # Convert 'Time' to timedelta for filtering
    df['Timedelta'] = pd.to_timedelta(df['Time'])

    # Convert input times to Timedelta
    d1_start_time = pd.to_timedelta(d1_start_time)
    d1_end_time = pd.to_timedelta(d1_end_time)

    # Filter rows based on time range
    df1 = df[(df['Timedelta'] >= d1_start_time) & (df['Timedelta'] <= d1_end_time)]
    
    return df1

df_split = split_dataframe_by_time(df_original, 
                    d1_start_time='09:31:20',
                    d1_end_time='13:44:20'
                    )


In [11]:
import numpy as np
# get the max values of the dataframe and the corresponding entry in the 'Lab Ambient' column
def get_max_values_and_ambient_at_same_time(df) -> pd.DataFrame:
    # Filter out non-numeric columns
    df.drop(columns=['Date', 'Time', 'sec'], inplace=True)
    numeric_columns = df.select_dtypes(include=[np.number]).columns
    
    # For each numeric column, find the max value and the index of the max value
    max_values = df[numeric_columns].max()
    max_values_index = df[numeric_columns].idxmax()
    
    # Create a new dataframe with the max values and the corresponding index
    max_values_df = pd.DataFrame([max_values, max_values_index], index=['max_value', 'max_value_index'])
    
    # Get the ambient value at the max value index
    ambient_values = df['TC9 Lab Ambient'].loc[max_values_index]
    
    # Add the ambient values to the max_values_df
    max_values_df.loc['ambient_value'] = ambient_values.values
    
    # Reorder the index of the dataframe to max_value_index, max_value, ambient_value
    max_values_df = max_values_df.reindex(['max_value_index', 'max_value', 'ambient_value'])
    
    # add a rise row that calculates the rise from ambient to max value
    max_values_df.loc['rise'] = max_values_df.loc['max_value'] - max_values_df.loc['ambient_value']
    
    return max_values_df

max_values_df = get_max_values_and_ambient_at_same_time(df_split)
max_values_df.head(3)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['Date', 'Time', 'sec'], inplace=True)


TypeError: unsupported operand type(s) for -: 'Timedelta' and 'float'

# check for failing data

In [None]:
# for column in df_split.columns:
# if column in measured_components_dict.keys():
#    if measured_components_dict[column]["type"] == "rise":
# uset the ambient value to calculate the rise in the max_values_df


# Downsample to get 15 minute intervals
15min*60s/40samples per second = 22.5 samples per 15min ~= 23 samples per 15min

In [None]:
# 15min*60s/40samples per second = 22.5 samples per 15min ~= 23 samples per 15min
def downsample_15_min(df):
    """
    This function samples every 23rd row from the given DataFrame.

    Parameters:
    df (pd.DataFrame): The DataFrame to sample from.

    Returns:
    pd.DataFrame: A new DataFrame containing every 23rd row from the original.
    """
    # Select every 23rd row
    sampled_df = df.iloc[::23, :]

    return sampled_df

df_15_min = downsample_15_min(df_split)

if DEBUG: print(test_1_15_min.head())

          Date      Time  sec  TC1 Gasket  TC2 Supply Lug  TC3 Backup Lug  \
0   2023/12/01  09:31:20    0        49.7            61.2            51.3   
23  2023/12/01  09:46:40    0        49.6            63.3            54.1   
46  2023/12/01  10:02:00    0        49.7            64.8            56.3   
69  2023/12/01  10:17:20    0        49.8            65.7            57.8   
92  2023/12/01  10:32:40    0        50.0            66.4            59.0   

    TC4 Subpanel Lug  TC5 MSA Busbar  TC6 Glass Door  \
0               55.2            61.8            49.3   
23              59.3            67.0            49.5   
46              62.4            71.5            49.6   
69              64.5            74.7            49.7   
92              66.0            77.3            49.9   

    TC7 Upper Enclosure Ambient  ...  TC37 Windings of Transformer  \
0                          49.4  ...                          45.0   
23                         50.4  ...                        

In [None]:

# Concatenate test max values this with the original DataFrame
test_1_df = pd.concat([test_1_15_min, max_values_df_test_1], ignore_index=True)
test_2_df = pd.concat([test_2_15_min, max_values_df_test_2], ignore_index=True)


In [None]:
def transpose_15_min_and_max(df):
        # Transpose the 15 monute DataFrame
    transposed_df = df.T

    # Reset index to get the column names as a separate column
    transposed_df.reset_index(inplace=True)

    # Rename columns
    new_column_names = ['NAME'] + [f'x{i*15}' for i in range(transposed_df.shape[1] - 1)]
    transposed_df.columns = new_column_names
    
    # Get the name of the last column
    last_column_name = transposed_df.columns[-1]

    # Rename the last column to 'Max'    
    transposed_df.rename(columns={last_column_name: 'Max'}, inplace=True)
    return transposed_df

test_1_unordered_df = transpose_15_min_and_max(test_1_df)
test_2_unordered_df = transpose_15_min_and_max(test_2_df)
if DEBUG: print(test_1_unordered_df.head())




             NAME          x0         x15         x30         x45         x60  \
0            Date  2023/12/01  2023/12/01  2023/12/01  2023/12/01  2023/12/01   
1            Time    09:31:20    09:46:40    10:02:00    10:17:20    10:32:40   
2             sec           0           0           0           0           0   
3      TC1 Gasket        49.7        49.6        49.7        49.8        50.0   
4  TC2 Supply Lug        61.2        63.3        64.8        65.7        66.4   

          x75         x90        x105        x120        x135        x150  \
0  2023/12/01  2023/12/01  2023/12/01  2023/12/01  2023/12/01  2023/12/01   
1    10:48:00    11:03:20    11:18:40    11:34:00    11:49:20    12:04:40   
2           0           0           0           0           0           0   
3        50.1        50.0        50.0        50.0        50.0        50.1   
4        66.8        66.9        67.1        67.2        67.3        67.4   

         x165        x180        x195        x210 

# re-arrange rows to match TDP

In [None]:
test_1_final_df.head(45)

NameError: name 'test_1_final_df' is not defined

# Generate output files with processed data

In [None]:
# generate output file names for processed data
def generate_output_filename(file_path, data_dir,temp):
    # Extract the base name of the file without the extension
    base_name = file_path.split('/')[-1].split('.')[0]
    # trim leading zeros from base name
    base_name = base_name.lstrip('0')
    
    # Construct the new file name with the .csv extension
    output_file = f"{data_dir}/processed_temp_data/{base_name}_{temp}_15min.csv"

    return output_file

test_1_output_file = generate_output_filename(input_file_path, output_data_dir,'25C')
test_2_output_file = generate_output_filename(input_file_path, output_data_dir,'50C')
if DEBUG:    
    print(test_1_output_file)
    print(test_2_output_file)


# write processed data to csv

In [None]:
# write processed data to csv
test_1_final_df.to_csv(test_1_output_file, index = None, header=True)
test_2_final_df.to_csv(test_2_output_file, index = None, header=True)