## Import the Necessary functions


In [1]:
import pandas as pd
import numpy as np
import json
import pprint
import random
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, BatchNormalization
from keras.models import load_model
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.signal import correlate, correlation_lags
from datetime import datetime
import random
from math import inf
import tensorflow as tf


# Define Constants

In [2]:
SUBSTITUE_VALUE = 1
MAX_CONNECTORS = 10
TIMESTEPS = 144

weather_cols = ['G_0']    #[ 'G_0', 'RH', 'T_a']

# ML model class definition
let's define the class for our ML Molel

In [3]:
class timeseries_ml_model:
  def __init__(self,
               time_steps,
               input_features=48,
               epochs=100,
               batch_size=16,
               save_best_model_during_training=False,
               loss = 'binary_crossentropy',
               metric = 'accuracy',
               optimizer = 'adam',
               last_activation = 'softmax',
               output_size=3,
               dense_neurons = 16
               ):
    self.time_steps = time_steps
    self.input_features = input_features
    self.epochs = epochs
    self.batch_size = batch_size
    self.optimizer = optimizer
    self.last_activation = last_activation
    self.x_train = None
    self.x_test = None
    self.y_train = None
    self.y_test = None
    self.model = None
    self.dl_model = None
    self.model_checkpoints = save_best_model_during_training
    self.loss = loss
    self.metric = metric
    self.output_size = output_size
    self.dense_neurons = dense_neurons

  def create_model(self):
    self.model = Sequential()
    num_of_neurons = self.time_steps * self.input_features
    self.model.add(LSTM(num_of_neurons, input_shape=(self.time_steps, self.input_features)))
    self.model.add(Dense(self.dense_neurons, activation='relu'))
    self.model.add(Dense(self.output_size, activation=self.last_activation))
    self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=[self.metric])

  def train_model(self):
    if self.model_checkpoints:
      filepath = 'callback_model.keras'
      model_checkpoint_callback = ModelCheckpoint(filepath,
                                                  monitor='val_loss',
                                                  verbose=0,
                                                  save_best_only=True,
                                                  save_weights_only=False,
                                                  mode='min',
                                                  save_freq='epoch',
                                                  initial_value_threshold=None
                                                  )
      self.model.fit(self.x_train,
                     self.y_train,
                     epochs=self.epochs,
                     batch_size=self.batch_size,
                     callbacks=[model_checkpoint_callback])
    else:
      self.model.fit(self.x_train,
                     self.y_train,
                     epochs=self.epochs,
                     batch_size=self.batch_size,
                     )

  def get_data(self,
               data,
               target,
               test_split,
               random_state=42):
    (self.x_train,
     self.x_test,
     self.y_train,
     self.y_test) = train_test_split(data,
                                     target,
                                     test_size=test_split,
                                     random_state=random_state)

  def make_predictions(self, model="dl"):
    if model=="dl":
      return self.dl_model.predict(self.x_test), self.y_test
    else:
      return self.model.predict(self.x_test), self.y_test

  def make_dl_model(self):
    self.dl_model = tf.keras.Sequential([
          tf.keras.layers.Normalization(),

          tf.keras.layers.Conv1D(128, 3, activation="relu"),
          tf.keras.layers.MaxPooling1D(),
          tf.keras.layers.BatchNormalization(),
          tf.keras.layers.Conv1D(256, 3, activation="relu"),
          tf.keras.layers.MaxPooling1D(),
          tf.keras.layers.BatchNormalization(),

          tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
          tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),

          tf.keras.layers.GlobalAveragePooling1D(),
          tf.keras.layers.Dropout(0.5),

          tf.keras.layers.Dense(3, activation=self.last_activation)
    ])
    self.dl_model.build(input_shape=(None,self.time_steps,self.input_features))
    self.dl_model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.001), loss='mse')

  def train_dl_model(self):
    self.dl_model.fit(self.x_train,
                     self.y_train,
                     epochs=self.epochs
                     )



# Define the helper functions


In [4]:

def calucluate_mean_absolute_error(predictions, actuals):
  # take the absolute difference between the predictions and the labels
  absolute_errors = np.abs(predictions - actuals)
  # calculate the mean absolute error
  mean_absolute_error = np.mean(absolute_errors, axis=0)
  return mean_absolute_error


def get_sensor_data(path, index_column='datetime'):
  data = pd.read_csv(path, index_col=index_column, parse_dates=True)
  date_time = pd.to_datetime(data.index, format='%d.%m.%Y %H:%M:%S')
  day = 24*60*60
  year = (365.2425)*day
  timestamp_s = date_time.map(pd.Timestamp.timestamp)
  data['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
  return data



def fix_lag(all_data, feature1, feature2, lag=None):
  x = all_data[feature1]
  y = all_data[feature2]

  if lag is None:
    correlation = correlate(all_data[feature1], all_data[feature2], mode="full")
    lags = correlation_lags(all_data[feature1].size, all_data[feature2].size, mode="full")
    lag = lags[np.argmax(correlation)]

  if lag < 0:
      x = x.iloc[:lag].reset_index(drop=True)
      x = [np.nan] * abs(lag) + list(x)
  else:
      x = x.iloc[lag:].reset_index(drop=True)
      x = list(x) + [np.nan] * lag
  all_data.loc[:,feature1] = x
  all_data.loc[:,feature2] = y

  return all_data



def get_string_exoskeletons(metadata_path):
  string_1_exoskeltons = []
  string_2_exoskeltons = []

  try:
      with open(metadata_path, 'r') as file:
          metadata = json.load(file)
      print("Metadata file read successfully!")

  except json.JSONDecodeError as err:
      print(f"JSON decode error: {err}")
      print(f"Error at line {err.lineno}, column {err.colno}")
  except Exception as e:
      print(f"An unexpected error occurred: {e}")

  sensor_class = {}
  for sensor, meta in metadata['Sensor_metadata'].items():
    if meta['String_id'] == 'String_1' and meta['sensor_type']=='V_T':
      string_1_exoskeltons.append(sensor)
    elif meta['String_id'] == 'String_2' and meta['sensor_type']=='V_T':
      string_2_exoskeltons.append(sensor)
    elif meta['sensor_type'] == 'I' and meta['String_id'] == 'String_1' and meta['Module_id'] == 'Inverter':
      string_1_inv_sensor = sensor
    elif meta['sensor_type'] == 'I' and meta['String_id'] == 'String_2' and meta['Module_id'] == 'Inverter':
      string_2_inv_sensor = sensor
    elif meta['sensor_type'] == 'I' and meta['String_id'] == 'String_1' and meta['Module_id'] != 'Inverter':
      string_1_current_sensor = sensor
    elif meta['sensor_type'] == 'I' and meta['String_id'] == 'String_2' and meta['Module_id'] != 'Inverter':
      string_2_current_sensor = sensor

    if meta['sensor_type'] == 'V_T':
      sensor_class[sensor] = metadata['MC4_metadata'][meta['MC4_sample_id']]['label']

  return (string_1_exoskeltons,
          string_2_exoskeltons,
          string_1_inv_sensor,
          string_2_inv_sensor,
          string_1_current_sensor,
          string_2_current_sensor,
          metadata)


def append_shifted_inverter_data(input_data,
                                 inverter_data_path,
                                 index_column='datetime',
                                 cols_of_interest=['U_dc_string1',
                                                   'U_dc_string2',
                                                   'I_dc_string1',
                                                   'I_dc_string2']):
  if inverter_data_path != '':
    inv_data = pd.read_csv(inverter_data_path, index_col=index_column, parse_dates=True)
    inv_data = inv_data[cols_of_interest]
    inv_data.index = pd.to_datetime(inv_data.index)
    input_data = pd.merge(input_data, inv_data, on="datetime", how="inner")

  input_data.index = pd.to_datetime(input_data.index)

  d2 = input_data[(input_data.index<='2024-03-29 23:59:59')]
  d3 = input_data[input_data.index>'2024-03-29 23:59:59']

  d2 = fix_lag(d2, 'I_dc_string1', 'I_FEFFFFB71E5E54E1', 13)
  d3 = fix_lag(d3, 'I_dc_string1', 'I_FEFFFFB71E5E54E1', 1)

  d2 = fix_lag(d2, 'I_dc_string2', 'I_FEFFFFB71E5E54E4', 13)
  d3 = fix_lag(d3, 'I_dc_string2', 'I_FEFFFFB71E5E54E4', 1)

  d2 = fix_lag(d2, 'U_dc_string1', 'I_FEFFFFB71E5E54E1', 13)
  d3 = fix_lag(d3, 'U_dc_string1', 'I_FEFFFFB71E5E54E1', 1)

  d2 = fix_lag(d2, 'U_dc_string2', 'I_FEFFFFB71E5E54E4', 13)
  d3 = fix_lag(d3, 'U_dc_string2', 'I_FEFFFFB71E5E54E4', 1)

  return pd.concat([d2,d3], axis=0)


def derive_power_resistance_features( input_data,
                                      _s1_exoskeletons,
                                      _s2_exoskeletons,
                                      _s1_current_sensor,
                                      _s2_current_sensor
                                      ):
  input_data = input_data[(input_data["I_dc_string1"]>0) | (input_data['I_dc_string2']>0)].copy()
  #input_data["V_"+string_1_current_sensor] = input_data[["V_"+x for x in string_1_exoskeltons]].sum(axis=1)
  input_data["P_string1"] = input_data["I_dc_string1"] * input_data["U_dc_string1"]
  input_data["R_string1"] = input_data["U_dc_string1"] / input_data["I_dc_string1"]

  #input_data["V_"+string_2_current_sensor] = input_data[["V_"+x for x in string_2_exoskeltons]].sum(axis=1)
  input_data["P_string2"] = input_data["I_dc_string2"] * input_data["U_dc_string2"]
  input_data["R_string2"] = input_data["U_dc_string2"] / input_data["I_dc_string2"]

  for col in _s1_exoskeletons:
    input_data["P_"+col] = input_data["I_"+_s1_current_sensor] * input_data["V_"+col]
    input_data["R_"+col] = input_data["V_"+col] / input_data["I_"+_s1_current_sensor]

  for col in _s2_exoskeletons:
    input_data["P_"+col] = input_data["I_"+_s2_current_sensor] * input_data["V_"+col]
    input_data["R_"+col] = input_data["V_"+col] / input_data["I_"+_s2_current_sensor]

  return input_data

def find_missing_days(df):
    missingData = {}
    years = set(df.index.year)
    months = set(df.index.month)
    for year in years:
      missingData[year] = {}
      for month in months:
          absent_days=[]
          try:
            for day in range(min(set(df[(df.index.month==month) & (df.index.year==year)].index.day)),
                            max(set(df[(df.index.month==month) & (df.index.year==year)].index.day))+1):
                if day not in set(df[df.index.month==month].index.day):
                    absent_days.append(day)
          except:
            pass
      missingData[year][month] = absent_days
    return missingData

def prepare_timeseries_data(input_data):
    # calculate all the missing days data
    missingData = find_missing_days(input_data)
    print(missingData)
    # Interpolate at 5 min
    input_data = input_data.resample("5min").interpolate()

    # retain only day time data
    input_data = input_data[(input_data.index.hour>6) & (input_data.index.hour<=18)]
    # remove from the resampled data the days which were not present initially.
    df = input_data.copy()
    for year, missing_days in missingData.items():
      for month, days in missing_days.items():
          for day in days:
              df = df[(df.index.year!=year) | (df.index.month!=month) | (df.index.day!=day)]
    print("New df Len: ",len(df), "Data Len: ",len(input_data))

    # first day and last day might not contain the full day data, so remove them.
    df = df[(df.index.month!=df.index.min().month) | (df.index.day!=df.index.min().day)]
    df = df[(df.index.month!=df.index.max().month) | (df.index.day!=df.index.max().day)]

    return df


def replace_connectors_with_sub_values(input_data, list_of_connectors):
  """
  Parameter:
    input_data: DataFrame, from which the connetors need to be replaced with substiture vales
    list_of_connectors: expects a list of int values in range (0, max_connectors - 1)

  Returns:
    Dataframe, with specified columns replaced with specified subustitute value
  """
  for conn in list_of_connectors:
    conn_cols = [["V_"+x, "T_"+x, "P_"+x, "R_"+x] for x in [conn]]
    for conn in conn_cols[0]:
      if conn in input_data.columns:
        input_data.loc[:,conn] = SUBSTITUE_VALUE
      else:
        print(conn, " not present in input_data.")
  return input_data


def drop_connectors(input_data, connectors_to_remove):
  """
  This selects a connetor to be dropped from the input_data and appends the
  new input_data to the current its data at the end

  Parameter:
    Data: DataFrame, from which the connetors need to be replaced with substiture vales
    connectors_to_remove: A list of connectors to be removed, where each element is str(connector_number)
  Returns:
    Dataframe, with possible combinations of missing connector appended on the end
  """
  _all_data = input_data.copy()
  for connector_number in sorted(connectors_to_remove, reverse=True):
    input_data = replace_connectors_with_sub_values(input_data, connector_number)
    num_days = len(input_data.resample('1D').mean())
    offset = pd.DateOffset(days=num_days)
    input_data.index = input_data.index + offset

    _all_data = pd.concat([_all_data, input_data], axis=0)

  return _all_data

def fill_substitute(input_data):
  conn_cols = [["V_"+str(x), "T_"+str(x), "P_"+str(x), "R_"+str(x)] for x in range(MAX_CONNECTORS)]
  conn_to_drop = []
  for conn in np.array(conn_cols).flatten():
    if conn not in input_data.columns:
      input_data.loc[:,conn] = SUBSTITUE_VALUE
    else:
      conn_to_drop.append(conn.split('_')[1])

  _all_data = drop_connectors(input_data, set(conn_to_drop))

  return _all_data

def get_string_data(input_data, _s1_exoskeletons, _s2_exoskeletons):
  vt_cols = [("V_"+x, "T_"+x, "P_"+x, "R_"+x) for x in _s1_exoskeletons]
  inv_cols = ["I_dc_", "U_dc_", "P_", "R_"]
  master_cols = (['Day sin'] +
                weather_cols +
                [x+"string1" for x in inv_cols] +
                [x for vt in vt_cols for x in vt])

  string1_data = input_data[master_cols]
  string1_data = string1_data[np.isfinite(string1_data).all(1)]

  vt_cols = [("V_"+x, "T_"+x, "P_"+x, "R_"+x) for x in _s2_exoskeletons]

  master_cols = (['Day sin'] +
                  weather_cols +
                  [x+"string2" for x in inv_cols] +
                  [x for VT in vt_cols for x in VT])
  string2_data = input_data[master_cols]
  string2_data = string2_data[np.isfinite(string2_data).all(1)]

  new_cols = []
  for i, x in enumerate([x for VT in vt_cols for x in VT]):
    new_cols.append(x[:2]+str(int(i/4)))
  new_cols = ['Day sin'] + weather_cols + ["I_S", "V_S", "P_S", "R_S"] + new_cols

  string1_data.columns = new_cols
  string2_data.columns = new_cols

  return string1_data, string2_data


def drop_x_percent_data(input_data, percent, non_substituted_cols, num_permutation, max_connectors, num_days):
  offset = pd.DateOffset(days=num_days)
  input_data.index = input_data.index + offset
  if percent==0:
    return input_data, num_days+len(input_data.resample('1D').mean())

  num_conn_to_drop = int(max_connectors*percent/100)
  _all_data = pd.DataFrame()
  prev_choice = None
  for p in range(num_permutation):
    indices = np.random.choice(max_connectors, num_conn_to_drop, replace=False)
    while (prev_choice==indices).all():
      indices = np.random.choice(max_connectors, num_conn_to_drop, replace=False)

    connectors_to_remove = [non_substituted_cols[i] for i in indices]
    temp_data = replace_connectors_with_sub_values(input_data, connectors_to_remove)
    if len(_all_data)>0:
      num_days = len(_all_data.resample('1D').mean())
      offset = pd.DateOffset(days=num_days)
      temp_data.index = temp_data.index + offset
    _all_data = pd.concat([_all_data, temp_data], axis=0)
    prev_choice = indices

  next_lot_index_starts_after = num_days+len(_all_data.resample('1D').mean())
  return _all_data, next_lot_index_starts_after

In [5]:
np.random.choice(3, 0, replace=False)

array([], dtype=int64)

# Get the University o Bern Data

Now, we can load sensor data and get meta information about the sensors.

After this is done, we need to shift the inverter data (there is a shift of 1 hour 5 mins in March data, and 5 min shift afterwards). Then data will be combined and P and R will be derived for each exoskeleton

In [6]:
bern_data = get_sensor_data("/content/Bern_data_upto_16_jun.csv")
(s1_exoskeletons_b1,
 s2_exoskeletons_b1,
 s1_inv_sensor_b1,
 s2_inv_sensor_b1,
 s1_current_sensor_b1,
 s2_current_sensor_b1,
 metadata_b1
 ) = get_string_exoskeletons("/content/metadata_bfh.json")

(s1_exoskeletons_b2,
 s2_exoskeletons_b2,
 s1_inv_sensor_b2,
 s2_inv_sensor_b2,
 s1_current_sensor_b2,
 s2_current_sensor_b2,
 metadata_b2
 ) = get_string_exoskeletons("/content/metadata_bfh_Exp2.json")

(s1_exoskeletons_b3,
 s2_exoskeletons_b3,
 s1_inv_sensor_b3,
 s2_inv_sensor_b3,
 s1_current_sensor_b3,
 s2_current_sensor_b3,
 metadata_b3
 ) = get_string_exoskeletons("/content/metadata_bfh_Exp3.json")

inv_adjusted_data = append_shifted_inverter_data(bern_data,
                                        inverter_data_path="")



Metadata file read successfully!
Metadata file read successfully!
Metadata file read successfully!


In [7]:
b1_end_date = '2024-04-29 00:00:00'
b2_end_date = '2024-05-21 00:00:00'
b3_end_date = '2024-07-17 00:00:00'

In [8]:
batch1_data = derive_power_resistance_features(
                  inv_adjusted_data[inv_adjusted_data.index<b1_end_date],
                  s1_exoskeletons_b1,
                  s2_exoskeletons_b1,
                  s1_current_sensor_b1,
                  s2_current_sensor_b1)

batch2_data = derive_power_resistance_features(
                  inv_adjusted_data[(inv_adjusted_data.index > b1_end_date) &
                                    (inv_adjusted_data.index < b2_end_date)],
                  s1_exoskeletons_b2,
                  s2_exoskeletons_b2,
                  s1_current_sensor_b2,
                  s2_current_sensor_b2)

batch3_data = derive_power_resistance_features(
                  inv_adjusted_data[(inv_adjusted_data.index > b2_end_date) &
                                    (inv_adjusted_data.index < b3_end_date)],
                  s1_exoskeletons_b3,
                  s2_exoskeletons_b3,
                  s1_current_sensor_b3,
                  s2_current_sensor_b3)

## Separate String-1 and String-2 data


Prepare for merging String-1 and String-2 on axis-0

In [9]:
batch1_data.columns

Index(['I_FEFFFFB71E5E54E1', 'I_FEFFFFB71E5E54E3', 'I_FEFFFFB71E5E54E4',
       'I_FEFFFFB71E5E54E5', 'T_FEFFFFB71E5E54B1', 'T_FEFFFFB71E5E54B2',
       'T_FEFFFFB71E5E54B3', 'T_FEFFFFB71E5E54B4', 'T_FEFFFFB71E5E54B5',
       'T_FEFFFFB71E5E54B6',
       ...
       'P_FEFFFFB71E5E54BF', 'R_FEFFFFB71E5E54BF', 'P_FEFFFFB71E5E54BD',
       'R_FEFFFFB71E5E54BD', 'P_FEFFFFB71E5E54BE', 'R_FEFFFFB71E5E54BE',
       'P_FEFFFFB71E5E54BC', 'R_FEFFFFB71E5E54BC', 'P_FEFFFFB71E5E54BB',
       'R_FEFFFFB71E5E54BB'],
      dtype='object', length=142)

In [10]:
data1, data2 = get_string_data(batch1_data, s1_exoskeletons_b1, s2_exoskeletons_b1)

data1 = prepare_timeseries_data(data1)
data2 = prepare_timeseries_data(data2)

non_substituted_cols = [str(x) for x in range(MAX_CONNECTORS)]
num_days = len(data1.resample('1D').mean())
s1_00_percent_data, num_days_00  = drop_x_percent_data(data1.copy(), 100, non_substituted_cols, 1, MAX_CONNECTORS, num_days)
s1_20_percent_data, num_days_20  = drop_x_percent_data(data1.copy(), 80, non_substituted_cols, 5, MAX_CONNECTORS, num_days_00)
s1_40_percent_data, num_days_40  = drop_x_percent_data(data1.copy(), 60, non_substituted_cols, 5, MAX_CONNECTORS, num_days_20)
s1_60_percent_data, num_days_60  = drop_x_percent_data(data1.copy(), 40, non_substituted_cols, 5, MAX_CONNECTORS, num_days_40)
s1_80_percent_data, num_days_80  = drop_x_percent_data(data1.copy(), 20, non_substituted_cols, 5, MAX_CONNECTORS, num_days_60)
s1_100_percent_data, num_days_100  = drop_x_percent_data(data1.copy(), 0, non_substituted_cols, 1, MAX_CONNECTORS, num_days_80)

s1_data = pd.concat([s1_00_percent_data, s1_20_percent_data, s1_40_percent_data, s1_60_percent_data, s1_80_percent_data, s1_100_percent_data], axis=0)

num_days = len(data2.resample('1D').mean())
s2_00_percent_data, num_days_00  = drop_x_percent_data(data2.copy(), 100, non_substituted_cols, 1, MAX_CONNECTORS, num_days)
s2_20_percent_data, num_days_20  = drop_x_percent_data(data2.copy(), 80, non_substituted_cols, 5, MAX_CONNECTORS, num_days_00)
s2_40_percent_data, num_days_40  = drop_x_percent_data(data2.copy(), 60, non_substituted_cols, 5, MAX_CONNECTORS, num_days_20)
s2_60_percent_data, num_days_60  = drop_x_percent_data(data2.copy(), 40, non_substituted_cols, 5, MAX_CONNECTORS, num_days_40)
s2_80_percent_data, num_days_80  = drop_x_percent_data(data2.copy(), 20, non_substituted_cols, 5, MAX_CONNECTORS, num_days_60)
s2_100_percent_data, num_days_100  = drop_x_percent_data(data2.copy(), 0, non_substituted_cols, 1, MAX_CONNECTORS, num_days_80)


s2_data = pd.concat([s2_00_percent_data, s2_20_percent_data, s2_40_percent_data, s2_60_percent_data, s2_80_percent_data, s2_100_percent_data], axis=0)


num_days = len(data1.resample('1D').mean())
offset = pd.DateOffset(days=num_days)
data2.index = data2.index + offset



X_batch1 = pd.concat([s1_data, s2_data], axis=0)

num_of_trainable_instances_df1 = int(len(s1_data)/TIMESTEPS)
num_of_trainable_instances_df2 = int(len(s2_data)/TIMESTEPS)


target_batch1 = np.array([[1.0, 0., 0.]]*num_of_trainable_instances_df1 + [[0.7, 0.15, 0.15]]*num_of_trainable_instances_df2)



{2024: {4: [4, 14, 20, 21, 22, 23]}}
New df Len:  6224 Data Len:  7088
{2024: {4: [14]}}
New df Len:  6944 Data Len:  7088


In [11]:
data1, data2 = get_string_data(batch2_data, s1_exoskeletons_b2, s2_exoskeletons_b2)

data1 = prepare_timeseries_data(data1)
data2 = prepare_timeseries_data(data2)

non_substituted_cols = [str(x) for x in range(MAX_CONNECTORS)]
num_days = len(data1.resample('1D').mean())
s1_00_percent_data, num_days_00  = drop_x_percent_data(data1.copy(), 100, non_substituted_cols, 1, MAX_CONNECTORS, num_days)
s1_20_percent_data, num_days_20  = drop_x_percent_data(data1.copy(), 80, non_substituted_cols, 5, MAX_CONNECTORS, num_days_00)
s1_40_percent_data, num_days_40  = drop_x_percent_data(data1.copy(), 60, non_substituted_cols, 5, MAX_CONNECTORS, num_days_20)
s1_60_percent_data, num_days_60  = drop_x_percent_data(data1.copy(), 40, non_substituted_cols, 5, MAX_CONNECTORS, num_days_40)
s1_80_percent_data, num_days_80  = drop_x_percent_data(data1.copy(), 20, non_substituted_cols, 5, MAX_CONNECTORS, num_days_60)
s1_100_percent_data, num_days_100  = drop_x_percent_data(data1.copy(), 0, non_substituted_cols, 1, MAX_CONNECTORS, num_days_80)

s1_data = pd.concat([s1_00_percent_data, s1_20_percent_data, s1_40_percent_data, s1_60_percent_data, s1_80_percent_data, s1_100_percent_data], axis=0)

num_days = len(data2.resample('1D').mean())
s2_00_percent_data, num_days_00  = drop_x_percent_data(data2.copy(), 100, non_substituted_cols, 1, MAX_CONNECTORS, num_days)
s2_20_percent_data, num_days_20  = drop_x_percent_data(data2.copy(), 80, non_substituted_cols, 5, MAX_CONNECTORS, num_days_00)
s2_40_percent_data, num_days_40  = drop_x_percent_data(data2.copy(), 60, non_substituted_cols, 5, MAX_CONNECTORS, num_days_20)
s2_60_percent_data, num_days_60  = drop_x_percent_data(data2.copy(), 40, non_substituted_cols, 5, MAX_CONNECTORS, num_days_40)
s2_80_percent_data, num_days_80  = drop_x_percent_data(data2.copy(), 20, non_substituted_cols, 5, MAX_CONNECTORS, num_days_60)
s2_100_percent_data, num_days_100  = drop_x_percent_data(data2.copy(), 0, non_substituted_cols, 1, MAX_CONNECTORS, num_days_80)


s2_data = pd.concat([s2_00_percent_data, s2_20_percent_data, s2_40_percent_data, s2_60_percent_data, s2_80_percent_data, s2_100_percent_data], axis=0)


num_days = len(data1.resample('1D').mean())
offset = pd.DateOffset(days=num_days)
data2.index = data2.index + offset



X_batch2 = pd.concat([s1_data, s2_data], axis=0)

num_of_trainable_instances_df1 = int(len(s1_data)/TIMESTEPS)
num_of_trainable_instances_df2 = int(len(s2_data)/TIMESTEPS)


target_batch2 = np.array([[0.85, 0.15, 0.]]*num_of_trainable_instances_df1 + [[0.85, 0., 0.15]]*num_of_trainable_instances_df2)



{2024: {5: [13, 14, 16, 17, 18, 19]}}
New df Len:  1008 Data Len:  1872
{2024: {5: [7]}}
New df Len:  2004 Data Len:  2148


In [12]:
data1, data2 = get_string_data(batch3_data, s1_exoskeletons_b3, s2_exoskeletons_b3)

data1 = prepare_timeseries_data(data1)
data2 = prepare_timeseries_data(data2)

non_substituted_cols = [str(x) for x in range(MAX_CONNECTORS)]
num_days = len(data1.resample('1D').mean())
s1_00_percent_data, num_days_00  = drop_x_percent_data(data1.copy(), 100, non_substituted_cols, 1, MAX_CONNECTORS, num_days)
s1_20_percent_data, num_days_20  = drop_x_percent_data(data1.copy(), 80, non_substituted_cols, 5, MAX_CONNECTORS, num_days_00)
s1_40_percent_data, num_days_40  = drop_x_percent_data(data1.copy(), 60, non_substituted_cols, 5, MAX_CONNECTORS, num_days_20)
s1_60_percent_data, num_days_60  = drop_x_percent_data(data1.copy(), 40, non_substituted_cols, 5, MAX_CONNECTORS, num_days_40)
s1_80_percent_data, num_days_80  = drop_x_percent_data(data1.copy(), 20, non_substituted_cols, 5, MAX_CONNECTORS, num_days_60)
s1_100_percent_data, num_days_100  = drop_x_percent_data(data1.copy(), 0, non_substituted_cols, 1, MAX_CONNECTORS, num_days_80)

s1_data = pd.concat([s1_00_percent_data, s1_20_percent_data, s1_40_percent_data, s1_60_percent_data, s1_80_percent_data, s1_100_percent_data], axis=0)

num_days = len(data2.resample('1D').mean())
s2_00_percent_data, num_days_00  = drop_x_percent_data(data2.copy(), 100, non_substituted_cols, 1, MAX_CONNECTORS, num_days)
s2_20_percent_data, num_days_20  = drop_x_percent_data(data2.copy(), 80, non_substituted_cols, 5, MAX_CONNECTORS, num_days_00)
s2_40_percent_data, num_days_40  = drop_x_percent_data(data2.copy(), 60, non_substituted_cols, 5, MAX_CONNECTORS, num_days_20)
s2_60_percent_data, num_days_60  = drop_x_percent_data(data2.copy(), 40, non_substituted_cols, 5, MAX_CONNECTORS, num_days_40)
s2_80_percent_data, num_days_80  = drop_x_percent_data(data2.copy(), 20, non_substituted_cols, 5, MAX_CONNECTORS, num_days_60)
s2_100_percent_data, num_days_100  = drop_x_percent_data(data2.copy(), 0, non_substituted_cols, 1, MAX_CONNECTORS, num_days_80)


s2_data = pd.concat([s2_00_percent_data, s2_20_percent_data, s2_40_percent_data, s2_60_percent_data, s2_80_percent_data, s2_100_percent_data], axis=0)

num_days = len(data1.resample('1D').mean())
offset = pd.DateOffset(days=num_days)
data2.index = data2.index + offset



X_batch3 = pd.concat([s1_data, s2_data], axis=0)

num_of_trainable_instances_df1 = int(len(s1_data)/TIMESTEPS)
num_of_trainable_instances_df2 = int(len(s2_data)/TIMESTEPS)


target_batch3 = np.array([[0.85, 0.09, 0.06]]*num_of_trainable_instances_df1 + [[0.85, 0.06, 0.09]]*num_of_trainable_instances_df2)



{2024: {5: []}}
New df Len:  528 Data Len:  528
{2024: {6: []}}
New df Len:  3888 Data Len:  3888


In [None]:
np.round([28/33,3/33,  2/33],2), np.round([28/33,2/33,  3/33],2),

(array([0.85, 0.09, 0.06]), array([0.85, 0.06, 0.09]))

#Get the New Delhi Testsite Data

In [13]:
len_wrt_timestep = lambda df: int(len(df)/TIMESTEPS)

In [14]:
s1 = get_sensor_data("/content/pureStr1_Batch3_2022_TSDel.csv", index_column='Time')
s1.loc[:,'C_P'] = s1['C_I']*s1['C_V']
s1.loc[:,'C_P_C2'] = s1['C_I_C2']*s1['C_V_C2']
s1.loc[:,'C_P_C3'] = s1['C_I_C3']*s1['C_V_C3']
doi_s1_testsite = s1[['Day sin', 'W_G', 'S1_C', 'S1_V', 'S1_P', 'S1_R',
                      'C_V', 'C_Tsur',  'C_P', 'C_R',
                      'C_V_C2', 'C_Tsur_C2', 'C_P_C2',  'C_R_C2',
                      'C_V_C3', 'C_Tsur_C3', 'C_P_C3', 'C_R_C3']]
doi_s1_testsite.columns = ['Day sin', 'G_0', 'I_S', 'V_S', 'P_S', 'R_S',
                           'V_0', 'T_0',  'P_0', 'R_0',
                           'V_1', 'T_1',  'P_1', 'R_1',
                           'V_2', 'T_2',  'P_2', 'R_2', ]

num_days = len(doi_s1_testsite.resample("1D").mean())
offset = pd.DateOffset(days=num_days)

s2 = get_sensor_data("/content/pureStr2_Batch3_2022_TSDel.csv", index_column='Time')
s2.loc[:,'C_P'] = s2['C_I']*s2['C_V']
s2.loc[:,'C_P_C2'] = s2['C_I_C2']*s2['C_V_C2']
s2.loc[:,'C_P_C3'] = s2['C_I_C3']*s2['C_V_C3']
doi_s2_testsite = s2[['Day sin', 'W_G', 'S2_C', 'S2_V', 'S2_P', 'S2_R',
                      'C_V', 'C_Tsur',  'C_P', 'C_R',
                      'C_V_C2', 'C_Tsur_C2', 'C_P_C2',  'C_R_C2',
                      'C_V_C3', 'C_Tsur_C3', 'C_P_C3', 'C_R_C3']]
doi_s2_testsite.columns = ['Day sin', 'G_0', 'I_S', 'V_S', 'P_S', 'R_S',
                           'V_0', 'T_0',  'P_0', 'R_0',
                           'V_1', 'T_1',  'P_1', 'R_1',
                           'V_2', 'T_2',  'P_2', 'R_2', ]


doi_s2_testsite.index += offset

doi_s1_testsite = doi_s1_testsite[doi_s1_testsite['R_S']!=inf]
doi_s1_testsite = prepare_timeseries_data(doi_s1_testsite)
doi_s2_testsite = doi_s2_testsite[doi_s2_testsite['R_S']!=inf]
doi_s2_testsite = prepare_timeseries_data(doi_s2_testsite)


doi_s1_testsite = fill_substitute(doi_s1_testsite)
doi_s2_testsite = fill_substitute(doi_s2_testsite)

X_testsite = pd.concat([doi_s1_testsite, doi_s2_testsite], axis=0)

s1_y_testsite_len_wrt_timesteps = len_wrt_timestep(doi_s1_testsite)
s2_y_testsite_len_wrt_timesteps = len_wrt_timestep(doi_s2_testsite)

testsite_y = np.array([[0.50, 0., 0.50]]*s1_y_testsite_len_wrt_timesteps+[[0.165, 0.335, 0.50]]*s2_y_testsite_len_wrt_timesteps)

X_testsite.head()

{2022: {12: [2, 3, 6, 7, 11, 12, 13, 14]}}
New df Len:  12846 Data Len:  13998
{2022: {12: [23, 30]}, 2023: {12: []}}
New df Len:  5333 Data Len:  5621


Unnamed: 0_level_0,Day sin,G_0,I_S,V_S,P_S,R_S,V_0,T_0,P_0,R_0,...,P_7,R_7,V_8,T_8,P_8,R_8,V_9,T_9,P_9,R_9
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-09-10 07:00:00,0.566813,313.909216,4.010784,184.835294,741.72549,46.087632,3.0,36.431373,12.591324,0.715141,...,1,1,1,1,1,1,1,1,1,1
2022-09-10 07:05:00,0.574561,314.038456,4.010294,184.838235,741.647059,46.093876,3.0,36.411765,12.588309,0.715297,...,1,1,1,1,1,1,1,1,1,1
2022-09-10 07:10:00,0.58231,314.167696,4.009804,184.841176,741.568627,46.10012,3.0,36.392157,12.585294,0.715452,...,1,1,1,1,1,1,1,1,1,1
2022-09-10 07:15:00,0.590058,314.296936,4.009314,184.844118,741.490196,46.106364,3.0,36.372549,12.582279,0.715608,...,1,1,1,1,1,1,1,1,1,1
2022-09-10 07:20:00,0.597807,314.426176,4.008824,184.847059,741.411765,46.112608,3.0,36.352941,12.579265,0.715763,...,1,1,1,1,1,1,1,1,1,1


# Create and Train the time series model

In [15]:
final_dataset_X = pd.concat([ X_batch2, X_batch3], axis=0)
final_dataset_y = np.concatenate((target_batch2, target_batch3), axis=0)

#final_dataset_X = X_testsite.copy()
#final_dataset_y = np.array(testsite_y)

num_of_trainable_instances = int(len(final_dataset_X)/TIMESTEPS)
features = len(final_dataset_X.columns)
scaler = StandardScaler()
data_scaled = (np.array(final_dataset_X)).reshape(num_of_trainable_instances, TIMESTEPS, features)

ai_model = timeseries_ml_model(time_steps= TIMESTEPS, input_features=features, epochs=200)
ai_model.get_data(data_scaled, final_dataset_y, test_split=1)

ai_model.make_dl_model()
ai_model.train_dl_model()

ypred, yact = ai_model.make_predictions(model="dl")

calucluate_mean_absolute_error(ypred, yact)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

array([0.00230547, 0.00124135, 0.00106416])

In [None]:
c1=[]
c2=[]
c3=[]
c4=[]
for i,j in zip(ypred, yact):
  if (j==[0.5, 0,  0.5]).all():
    c1.append(i)
  elif (j==[0.165, 0.335, 0.5]).all():
    c2.append(i)
  elif (j==[1.0  , 0.0, 0.]).all():
    c3.append(i)
  elif (j==[0.7, 0.15  , 0.15]).all():
    c4.append(i)

np.mean(c1, axis=0), np.mean(c2, axis=0),np.mean(c3, axis=0), np.mean(c4, axis=0)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


(nan, array([0.16020414, 0.3413809 , 0.49841496], dtype=float32), nan, nan)

# Testing Bern Batch-3 Data
we will load the data of batch-3 from Bern. Here the String Configuration is as follows:


1.   String-1

*   5 Normal
*   3 Bad Crimp
*   2 Crossmated


2.   String-2
*   5 Normal
*   2 Bad Crimp
*   3 Crossmated







In [21]:
(np.array(s2_00_percent_data)).reshape(int(len(s2_00_percent_data)/TIMESTEPS), TIMESTEPS, features).shape, target_batch3.shape

((25, 144, 46), (594, 3))

In [18]:
test_data = (np.array(X_batch1)).reshape(int(len(X_batch1)/TIMESTEPS), TIMESTEPS, features)
target =
ypred = ai_model.dl_model.predict(test_data)

print(f"Mean absolute Error in predicting S1 Composition: {calucluate_mean_absolute_error(ypred, target_batch1)}\nAverage Error: {calucluate_mean_absolute_error(ypred, target_batch1).mean()}")
c1=[]
c2=[]
for i,j in zip(ypred, target_batch1):
  if (j==[1.  , 0., 0.]).all():
    c1.append(i)
  elif (j==[0.7, 0.15, 0.15]).all():
    c2.append(i)

np.mean(c1, axis=0), np.mean(c2, axis=0)

Mean absolute Error in predicting S1 Composition: [0.15200605 0.08406375 0.06794229]
Average Error: 0.1013373651281777


(array([0.85587114, 0.06327067, 0.08085863], dtype=float32),
 array([0.85904485, 0.04735517, 0.0936    ], dtype=float32))

Testing all Batches of Bern

In [None]:
dataset_X = pd.concat([X_batch1, X_batch2, X_batch3], axis=0)
dataset_y = np.concatenate((target_batch1, target_batch2, target_batch3), axis=0)

num_of_trainable_instances = int(len(dataset_X)/TIMESTEPS)
features = len(dataset_X.columns)

test_data = (np.array(dataset_X)).reshape(num_of_trainable_instances, TIMESTEPS, features)
pred = ai_model.dl_model.predict(test_data)
c1=[]
c2=[]
c3=[]
c4=[]
c5=[]
c6=[]
for i,j in zip(pred, dataset_y):
  if (j==[0.85  , 0.15, 0.]).all():
    c1.append(i)
  elif (j==[0.85, 0.  , 0.15]).all():
    c2.append(i)
  elif (j==[1, 0, 0.]).all():
    c3.append(i)
  elif (j==[0.7, 0.15, 0.15]).all():
    c4.append(i)
  elif (j==[0.85, 0.09, 0.06]).all():
    c5.append(i)
  elif (j==[0.85, 0.06, 0.09]).all():
    c6.append(i)

np.mean(c1, axis=0), np.mean(c2, axis=0), np.mean(c3, axis=0), np.mean(c4, axis=0), np.mean(c5, axis=0), np.mean(c6, axis=0)



(array([0.85397756, 0.14432998, 0.00169254], dtype=float32),
 array([8.5462636e-01, 8.0378505e-04, 1.4457011e-01], dtype=float32),
 array([0.99663013, 0.0015904 , 0.0017809 ], dtype=float32),
 array([0.7016829 , 0.14842808, 0.14988945], dtype=float32),
 array([0.8453656 , 0.00308155, 0.15155289], dtype=float32),
 array([0.85362965, 0.00737712, 0.13899305], dtype=float32))