In [3]:
import sqlite3
import random
from datetime import datetime, timedelta

# Connect to the SQLite database
conn = sqlite3.connect("db/original_data.db")
cursor = conn.cursor()

# Create the table if it doesn't already exist
cursor.execute('''
CREATE TABLE IF NOT EXISTS additional_original_data (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    timestamp TEXT,
    Grid_Selection INTEGER
)
''')

# Define the start and end datetime
start_time = datetime(2024, 1, 1, 0, 0, 0)
end_time = datetime(2024, 12, 31, 23, 59, 59)
interval = timedelta(minutes=6)

# Generate data
data_to_insert = []
current_time = start_time
while current_time <= end_time:
    timestamp = current_time.strftime("%Y-%m-%dT%H:%M:%S")
    grid_value = 0 if random.random() < 0.8 else 1  # 80% 0, 20% 1
    data_to_insert.append((timestamp, grid_value))
    current_time += interval

# Insert into the database
cursor.executemany('''
INSERT INTO additional_original_data (timestamp, Grid_Selection)
VALUES (?, ?)
''', data_to_insert)

# Commit and close
conn.commit()
conn.close()

print(f"Inserted {len(data_to_insert)} rows into 'additional_original_data'")


Inserted 87840 rows into 'additional_original_data'


In [3]:
import pandas as pd
import pickle

# Load Excel file
df = pd.read_excel("/home/arkiven4/Downloads/Sample_Down Time Events.xlsx")
df['Start'] = pd.to_datetime(df['Start Date'] + ' ' + df['Start Time'])
df['End'] = pd.to_datetime(df['End Date'] + ' ' + df['End Time'])

In [4]:
df

Unnamed: 0,Start Date,Start Time,End Date,End Time,Event,Category,Plant,Start,End
0,2023-02-19,08:25:00,2023-02-19,10:16:00,LGS#1 Trip by over frequency (Total blackout),PV,LGS1,2023-02-19 08:25:00,2023-02-19 10:16:00
1,2023-02-27,20:15:00,2023-02-27,20:35:00,LGS#1 Trip by Line#1 and Line#2 Trip,PV,LGS1,2023-02-27 20:15:00,2023-02-27 20:35:00
2,2023-03-10,22:53:00,2023-03-10,23:01:00,LGS#1 Trip to standstill,CD,LGS2,2023-03-10 22:53:00,2023-03-10 23:01:00
3,2023-03-10,22:53:00,2023-03-10,23:01:00,LGS#1 Trip to standstill,CD,LGS2,2023-03-10 22:53:00,2023-03-10 23:01:00
4,2023-04-24,03:57:00,2023-04-24,04:32:00,LGS#1 trip by 186 TX hand reset lockout relay,CR,LGS3,2023-04-24 03:57:00,2023-04-24 04:32:00
5,2023-05-22,20:13:00,2023-05-22,20:32:00,LGS#1 Trip by 86N energized,CR,LGS3,2023-05-22 20:13:00,2023-05-22 20:32:00
6,2023-07-31,10:49:00,2023-07-31,10:59:00,LGS#1 Trip by under frequency,PV,KGS1,2023-07-31 10:49:00,2023-07-31 10:59:00
7,2023-09-16,08:16:00,2023-09-16,09:36:00,LGS#1 Trip by over frequency (Total blackout),PV,KGS2,2023-09-16 08:16:00,2023-09-16 09:36:00
8,2023-10-04,22:01:00,2023-10-04,22:18:00,Power Hydro to FCE’s grid Blackout.,CD,BGS1,2023-10-04 22:01:00,2023-10-04 22:18:00
9,2022-03-31,10:49:00,2022-03-31,11:18:00,Furnace grid blackout,CR,BGS2,2022-03-31 10:49:00,2022-03-31 11:18:00


In [None]:


# Group and reshape by Plant and Category
category_counts = df.groupby(['Plant', 'Category']).size().unstack(fill_value=0)

# Prepare final data
final_data = {
    'plants': list(category_counts.index),
    'data': [
        {
            'label': col,
            'data': category_counts[col].tolist(),
        }
        for col in category_counts.columns
    ],
    'raw_events': df[['Start Date', 'Plant', 'Category']].to_dict(orient='records')  # for filtering later
}

# # Save to pickle
# with open(settings.MONITORINGDB_PATH + 'db/number_of_event.pickle', 'wb') as handle:
#     pickle.dump(final_data, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [2]:
final_data

{'plants': ['BGS1', 'BGS2', 'KGS1', 'KGS2', 'LGS1', 'LGS2', 'LGS3'],
 'data': [{'label': 'CD', 'data': [3, 0, 2, 0, 2, 4, 2]},
  {'label': 'CR', 'data': [0, 1, 0, 1, 1, 0, 3]},
  {'label': 'PV', 'data': [0, 2, 1, 2, 3, 2, 1]}],
 'raw_events': [{'Start Date': Timestamp('2023-02-19 00:00:00'),
   'Plant': 'LGS1',
   'Category': 'PV'},
  {'Start Date': Timestamp('2023-02-27 00:00:00'),
   'Plant': 'LGS1',
   'Category': 'PV'},
  {'Start Date': Timestamp('2023-03-10 00:00:00'),
   'Plant': 'LGS2',
   'Category': 'CD'},
  {'Start Date': Timestamp('2023-03-10 00:00:00'),
   'Plant': 'LGS2',
   'Category': 'CD'},
  {'Start Date': Timestamp('2023-04-24 00:00:00'),
   'Plant': 'LGS3',
   'Category': 'CR'},
  {'Start Date': Timestamp('2023-05-22 00:00:00'),
   'Plant': 'LGS3',
   'Category': 'CR'},
  {'Start Date': Timestamp('2023-07-31 00:00:00'),
   'Plant': 'KGS1',
   'Category': 'PV'},
  {'Start Date': Timestamp('2023-09-16 00:00:00'),
   'Plant': 'KGS2',
   'Category': 'PV'},
  {'Start Date

In [18]:
import pickle, os, sqlite3
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
import matplotlib.pyplot as plt

from tokenizers import PreTokenizedString
from tqdm import tqdm
from datetime import datetime, timedelta
from collections import Counter
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.stats import spearmanr
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.gridspec import GridSpec
from matplotlib.dates import DateFormatter

from django.conf import settings
import apis.commons as commons

In [4]:
feature_tag_mapping = {
    'LGS1 Active Power': 'U-LGS1-Active-Power-AI',
    'LGS1-Auxiliary Grid (0 = ACTIVE)': 'U-LGS1-N75-15-0-AI',
    'LGS1 Governor Unit Speed Actual': 'U-LGS1-SI-81101-AI',
    
    'LGS2 Active Power': 'U-LGS2-Active-Power-AI',
    'LGS2-Auxiliary Grid (0 = ACTIVE)': 'U-LGS2-N75-25-0-AI',
    'LGS2 Governor Unit Speed Actual': 'U-LGS2-SI-81201-AI',
    
    'LGS3 Active Power': 'U-LGS3_Active-Power-AI',
    'LGS3-Auxiliary Grid (0 = ACTIVE)': 'U-LGS3-N75-35-0-AI',
    'LGS3 Governor Unit Speed Actual': 'U-LGS3_SI_81301_I_Eng-AI',

    'Avg Hydro Power Available 1D (Avg)': 'U-PWR-HYDRO-AI-AVGD',
    'Total Hydro Power Daily (Tot)': 'U-HGST-Power-AI-DTT',
    'Total Larona Power Daily (Tot)': 'U-PWR-LAR-TOT-DTT',
    'Total Balambano Power Daily (Tot)': 'U-PWR-BAL-TOT-DTT',
    'Total Karebbe Power Daily (Tot)': 'U-PWR-KAR-TOT-DTT', 

    # BGS
    'BGS1 Power': 'U-BGS1-Power-AI',
    'BGS1-Auxiliary Grid (0 = ACTIVE)': 'U-BGS1-N75-45-0-AI',
    'GEN SPEED BGS1': 'U-BGS1_I_T_SPEED-AI',

    'BGS2 Power': 'U-BGS2-Power-AI',
    'BGS2-Auxiliary Grid (0 = ACTIVE)': 'U-BGS2-N75-55-0-AI',
    'GEN SPEED BGS2': 'U-BGS2_I_T_SPEED-AI',

    # KGS
    'K U1 Active Power (MW)': 'U-KGS1-Active_Power_AI',
    'KGS1-Auxiliary Grid (0 = ACTIVE)': 'U-KGS1-N75-65-0-AI',
    'K U1 Turb Gov Turbine Speed (RPM)': 'U-KGS1-Turb_Gov_Turb_Speed-AI',

    'K U2 Active Power (MW)': 'U-KGS2-Active_Power_AI',
    'KGS2-Auxiliary Grid (0 = ACTIVE)': 'U-KGS2-N75-75-0-AI',
    'K U2 Turb Gov Turbine Speed (RPM)': 'U-KGS2-Turb_Gov_Turb_Speed-AI',
}

reverse_mapping = {v: k for k, v in feature_tag_mapping.items()}

In [5]:
[v for k,v in feature_tag_mapping.items()]

['U-LGS1-Active-Power-AI',
 'U-LGS1-N75-15-0-AI',
 'U-LGS1-SI-81101-AI',
 'U-LGS2-Active-Power-AI',
 'U-LGS2-N75-25-0-AI',
 'U-LGS2-SI-81201-AI',
 'U-LGS3_Active-Power-AI',
 'U-LGS3-N75-35-0-AI',
 'U-LGS3_SI_81301_I_Eng-AI',
 'U-PWR-HYDRO-AI-AVGD',
 'U-HGST-Power-AI-DTT',
 'U-PWR-LAR-TOT-DTT',
 'U-PWR-BAL-TOT-DTT',
 'U-PWR-KAR-TOT-DTT',
 'U-BGS1-Power-AI',
 'U-BGS1-N75-45-0-AI',
 'U-BGS1_I_T_SPEED-AI',
 'U-BGS2-Power-AI',
 'U-BGS2-N75-55-0-AI',
 'U-BGS2_I_T_SPEED-AI',
 'U-KGS1-Active_Power_AI',
 'U-KGS1-N75-65-0-AI',
 'U-KGS1-Turb_Gov_Turb_Speed-AI',
 'U-KGS2-Active_Power_AI',
 'U-KGS2-N75-75-0-AI',
 'U-KGS2-Turb_Gov_Turb_Speed-AI']

In [24]:
count = 0
master_pd = ""
column_name = []

for subdir, dirs, files in os.walk("data_csv/2024"):
    for file in files:
        filepath = subdir + os.sep + file
        tag_name = filepath.split("/")[-1].split(".")[0]
        feature_key = reverse_mapping.get(tag_name)
        column_name.append(feature_key)

        value_resp = pd.read_csv(filepath)
        if count == 0:
            value_resp['Timestamps'] = pd.to_datetime(value_resp['Timestamps'])
            master_pd = value_resp
        else:
            master_pd = pd.concat([master_pd, value_resp['Values']], axis=1, join='inner')

        count = count + 1

  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
  value_resp = pd.read_csv(filepath)
 

In [25]:
master_pd = master_pd.values
master_pd = pd.DataFrame(data=master_pd, columns=['TimeStamp'] + list(column_name)) #+ feature_set + ['Grid Selection'])
master_pd = master_pd.reset_index(drop=True)
master_pd.replace('I/O Timeout', np.nan, inplace=True)
master_pd.replace('No Data', np.nan, inplace=True)
master_pd.replace('Future Data Unsupported', np.nan, inplace=True)
master_pd.replace('Closed', np.nan, inplace=True)
master_pd.replace('Open', np.nan, inplace=True)

for column_name in master_pd.columns:
    if column_name != 'Load_Type' and column_name != 'TimeStamp':
        master_pd[column_name] = pd.to_numeric(master_pd[column_name], downcast='float')

master_pd = master_pd.sort_values(by='TimeStamp')
master_pd = master_pd.reset_index(drop=True)
master_pd = master_pd.fillna(method='ffill')

  master_pd.replace('I/O Timeout', np.nan, inplace=True)
  master_pd.replace('No Data', np.nan, inplace=True)
  master_pd.replace('Open', np.nan, inplace=True)
  master_pd = master_pd.fillna(method='ffill')


In [26]:
master_pd.drop(['Unit breaker BGS2', 'Unit breaker BGS1'], axis=1, inplace=True)

In [None]:
#master_pd.to_csv("kpi2024.csv", index=False)

In [13]:
master_pd = pd.read_csv("kpi2024.csv")

In [27]:
def init_db_timeconst(feature_set, db_name="masters_data.db", table_name="severity_trending"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    
    # Create table if it does not exist
    columns = ", ".join([feature_name.replace(" ", "_") for feature_name in feature_set])
    cursor.execute(f"""
        CREATE TABLE IF NOT EXISTS {table_name} (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            timestamp TEXT,
            {columns}
        )
    """)

    conn.commit()
    conn.close()

def timeseries_savedb(df_timestamp, data, feature_set, db_name="data.db", table_name="sensor_data"):
    #if len(data) == 30:
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    # Generate timestamp
    timestamp = df_timestamp.isoformat()
    
    # Build column names for features, replacing spaces with underscores
    feature_columns = ', '.join([feature_name.replace(" ", "_") for feature_name in feature_set])
    placeholders = ', '.join(['?' for _ in range(len(feature_set))])
    
    # Upsert using INSERT OR REPLACE
    # Note: Your table must have a UNIQUE constraint on the timestamp column.
    sql = f"""
        INSERT OR REPLACE INTO {table_name} (timestamp, {feature_columns})
        VALUES (?, {placeholders})
    """
    cursor.execute(sql, (timestamp, *data))
    
    conn.commit()
    conn.close()

def is_number(x):
    return isinstance(x, (int, float))


In [28]:
plant_metadata = {
    'Larona': [{
        'name': "LGS1",
        'active_power': 'LGS1 Active Power',
        'rpm': 'LGS1 Governor Unit Speed Actual',
        'aux': 'LGS1-Auxiliary Grid (0 = ACTIVE)',
        'coef': [20.944, 11.398]
    },
    {
        'name': "LGS2",
        'active_power': 'LGS2 Active Power',
        'rpm': 'LGS2 Governor Unit Speed Actual',
        'aux': 'LGS2-Auxiliary Grid (0 = ACTIVE)',
        'coef': [21.162, 8.49]
    },
    {
        'name': "LGS3",
        'active_power': 'LGS3 Active Power',
        'rpm': 'LGS3 Governor Unit Speed Actual',
        'aux': 'LGS3-Auxiliary Grid (0 = ACTIVE)',
        'coef': [19.66, 13.676]
    }],
    'Balambano': [{
        'name': "BGS1",
        'active_power': 'BGS1 Power',
        'rpm': 'GEN SPEED BGS1',
        'aux': 'BGS1-Auxiliary Grid (0 = ACTIVE)',
        'coef': [20.944, 11.398]
    },
    {
        'name': "BGS2",
        'active_power': 'BGS2 Power',
        'rpm': 'GEN SPEED BGS2',
        'aux': 'BGS2-Auxiliary Grid (0 = ACTIVE)',
        'coef': [21.162, 8.49]
    }],
    'Karebbe': [{
        'name': "KGS1",
        'active_power': 'K U1 Active Power (MW)',
        'rpm': 'K U1 Turb Gov Turbine Speed (RPM)',
        'aux': 'KGS1-Auxiliary Grid (0 = ACTIVE)',
        'coef': [20.944, 11.398]
    },
    {
        'name': "KGS2",
        'active_power': 'K U2 Active Power (MW)',
        'rpm': 'K U2 Turb Gov Turbine Speed (RPM)',
        'aux': 'KGS2-Auxiliary Grid (0 = ACTIVE)',
        'coef': [21.162, 8.49]
    }]
}

for value in plant_metadata.values():
    for value2 in value:
        init_db_timeconst(['oee', 'phy_avail', 'performance', 'uo_Avail', "aux_0", "aux_1"], "db/kpi.db", value2['name'])

init_db_timeconst(['hpd', 'ahpa', 'lpd', 'bpd', 'kpd'], "db/kpi.db", "PowerProd")

In [29]:
def process_shutdown_and_snl_periods(df_selected, column_name):
    data_timestamp = df_selected[['TimeStamp']].values
    sensor_datas = df_selected[column_name].values

    activepower_data = sensor_datas[:, 0].astype(float)
    rpm_data = sensor_datas[:, 1].astype(float)

    shutdown_mask = (activepower_data <= 3) & (rpm_data <= 10)
    snl_mask = (activepower_data <= 3) & (rpm_data >= 259.35) & (rpm_data <= 286.65)

    def extract_periods(mask):
        change_points = np.diff(mask.astype(int), prepend=0)
        start_indices = np.where(change_points == 1)[0]
        end_indices = np.where(change_points == -1)[0]

        if mask[-1]:
            end_indices = np.append(end_indices, len(mask))
        if mask[0]:
            start_indices = np.insert(start_indices, 0, 0)

        periods = []
        for start, end in zip(start_indices, end_indices):
            start_time = data_timestamp[start][0]
            end_time = data_timestamp[end - 1][0]
            periods.append((start_time, end_time))
        return periods

    shutdown_periods = extract_periods(shutdown_mask)
    snl_periods = extract_periods(snl_mask)

    return shutdown_periods, snl_periods

def compute_oee_metrics(df_selected, column_name, shutdown_periods, snl_periods, performance_formula):
    data_timestamp = df_selected[['TimeStamp']].values.flatten()
    sensor_datas = df_selected[column_name].values

    active_power = sensor_datas[:, 0].astype(float)

    nonzeroneg_mask = active_power > 0
    total_hours = (pd.to_datetime(str(data_timestamp[-1])) - pd.to_datetime(str(data_timestamp[0]))).total_seconds() / 3600

    downtime_hours = sum(
        (pd.to_datetime(str(end)) - pd.to_datetime(str(start))).total_seconds() / 3600
        for start, end in shutdown_periods
    )
    snl_hours = sum(
        (pd.to_datetime(str(end)) - pd.to_datetime(str(start))).total_seconds() / 3600
        for start, end in snl_periods
    )

    phy_avail = max(round((total_hours - downtime_hours) / total_hours, 2), 0.01)
    uo_Avail = max(round((total_hours - snl_hours) / total_hours, 2), 0.01)

    if np.any(nonzeroneg_mask):
        log_mean = np.mean(np.log(active_power[nonzeroneg_mask]))
        performance = max(round((performance_formula[0] * log_mean + performance_formula[1]) / 100, 2), 0)
    else:
        performance = 0.01

    oee = max(round(phy_avail * performance * uo_Avail, 2), 0.01)
    datetime_nowMidnight = pd.to_datetime(str(data_timestamp[-1])).replace(hour=1, minute=0, second=0)

    return datetime_nowMidnight, oee, phy_avail, performance, uo_Avail


In [30]:
start_time = pd.to_datetime('2024-01-01 01:00:00')
end_time = master_pd['TimeStamp'].max()

current_start = start_time
while current_start < end_time:
    current_end = current_start + pd.DateOffset(days=1)

    mask = (master_pd['TimeStamp'] >= current_start) & (
        master_pd['TimeStamp'] < current_end)
    df_sel = master_pd.loc[mask]

    for value in plant_metadata.values():
        for tags in value:
            unit_name = tags['name']

            if tags['active_power'] not in df_sel.columns or tags['rpm'] not in df_sel.columns:
                continue  # Skip if required data not present

            df_unit = df_sel[['TimeStamp', tags['active_power'], tags['rpm'], tags['aux']]].dropna()
            if df_unit.empty:
                continue

            # Process shutdown & SNL
            shutdown_periods, snl_periods = process_shutdown_and_snl_periods(
                df_unit, [tags['active_power'], tags['rpm']]
            )

            # Compute OEE and related KPIs
            datetime_nowMidnight, oee, phy_avail, performance, uo_Avail = compute_oee_metrics(
                df_unit, [tags['active_power'], tags['rpm']],
                shutdown_periods, snl_periods,
                performance_formula=tags['coef']
            )

            # Count Auxiliary Grid ON/OFF
            counts_aux = df_unit[tags['aux']].value_counts().sort_index()
            aux_0 = counts_aux.get(0.0, 0)
            aux_1 = counts_aux.get(1.0, 0)

            # Save to database
            timeseries_savedb(
                datetime_nowMidnight,
                np.array([oee, phy_avail, performance, uo_Avail, aux_0, aux_1]),
                ['oee', 'phy_avail', 'performance', 'uo_Avail', 'aux_0', 'aux_1'],
                "db/kpi.db",
                unit_name
            )

    pda_datas = df_sel[['Total Hydro Power Daily (Tot)', 'Avg Hydro Power Available 1D (Avg)' , 'Total Larona Power Daily (Tot)', 'Total Balambano Power Daily (Tot)', 'Total Karebbe Power Daily (Tot)']].mean().values
    timeseries_savedb(
            datetime_nowMidnight,
            np.array([pda_datas[0], pda_datas[1], pda_datas[2], pda_datas[3], pda_datas[3]]).astype(np.float64),
            ['hpd', 'ahpa', 'lpd', 'bpd', 'kpd'],
            "db/kpi.db",
            "PowerProd"
        )
    current_start = current_end

In [None]:
sensor_datas = commons.fetch_between_dates("2023-01-22T18:13:00", "2023-05-22T19:52:00", "db/original_data.db", "original_data")
data_timestamp = sensor_datas[:, 1]
sensor_datas = sensor_datas[:, 2:].astype(float)

frame_len = 286
num_frames = len(sensor_datas) // frame_len  # 120
usable_len = frame_len * num_frames  # 34320

data_timestamp = data_timestamp[:usable_len]
sensor_datas = sensor_datas[:usable_len]

frames_timestamp = data_timestamp.reshape(num_frames, frame_len)
frames_sensor = sensor_datas.reshape(num_frames, frame_len, 30)
for i_frame, frame in enumerate(frames_sensor):
    now_timestamp = frames_timestamp[i_frame, :]
    now_sensors = frames_sensor[i_frame, :]

    shutdown_periods = commons.process_shutdownTimestamp(now_timestamp, now_sensors)
    snl_periods = commons.process_SNLTimestamp(now_timestamp, now_sensors)

    nonzeroneg_activepower = now_sensors[:, 0] > 0
    total_hours = (datetime.fromisoformat(str(now_timestamp[-1])) - datetime.fromisoformat(str(now_timestamp[0]))).total_seconds() / 3600
    downtime_hours = 0
    for datespan_downtime in shutdown_periods:
        delta = datetime.fromisoformat(str(datespan_downtime[1])) - datetime.fromisoformat(str(datespan_downtime[0]))
        delta_hours = delta.total_seconds() / 3600
        downtime_hours += delta_hours

    snl_hours = 0
    for datespan_snl in snl_periods:
        delta = datetime.fromisoformat(str(datespan_snl[1])) - datetime.fromisoformat(str(datespan_snl[0]))
        delta_hours = delta.total_seconds() / 3600
        snl_hours += delta_hours

    phy_avail = max(round(((total_hours - downtime_hours) / total_hours), 2), 0.01)
    uo_Avail = max(round(((total_hours - snl_hours) / total_hours), 2), 0.01)
    if len(now_sensors[nonzeroneg_activepower, 0]) > 0:
        performance = max(round((20.944 * np.mean(np.log(now_sensors[nonzeroneg_activepower, 0])) + 11.398)/100, 2), 0)
    else:
        performance = 0.01
    
    oee = max(round(phy_avail * performance * uo_Avail,2), 0.01)
    datetime_nowMidnight = datetime.fromisoformat(str(now_timestamp[-1])).replace(hour=1, minute=0, second=0)

    timeseries_savedb(datetime_nowMidnight, np.array([oee, phy_avail, performance, uo_Avail]), ['oee', 'phy_avail', 'performance', 'uo_Avail'], "db/kpi.db", "kpi")

In [None]:
sensor_datas.shape

In [None]:
286 / 15

In [None]:
import pickle
import os
import pandas as pd
import numpy as np
import sqlite3
from tqdm import tqdm
import matplotlib.dates as mdates
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.gridspec import GridSpec
from matplotlib.dates import DateFormatter

from django.conf import settings

from pathlib import Path

feature_set = ['Active Power', 'Reactive Power', 'Governor speed actual', 'UGB X displacement', 'UGB Y displacement',
    'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
    'TGB Y displacement', 'Stator winding temperature 13',
    'Stator winding temperature 14', 'Stator winding temperature 15',
    'Surface Air Cooler Air Outlet Temperature',
    'Surface Air Cooler Water Inlet Temperature',
    'Surface Air Cooler Water Outlet Temperature',
    'Stator core temperature', 'UGB metal temperature',
    'LGB metal temperature 1', 'LGB metal temperature 2',
    'LGB oil temperature', 'Penstock Flow', 'Turbine flow',
    'UGB cooling water flow', 'LGB cooling water flow',
    'Generator cooling water flow', 'Governor Penstock Pressure',
    'Penstock pressure', 'Opening Wicked Gate', 'UGB Oil Contaminant',
    'Gen Thrust Bearing Oil Contaminant']

model_array = ["Attention", "DTAAD", "MAD_GAN", "TranAD", "DAGMM", "USAD", "OmniAnomaly"]
with open('model_thr.pickle', 'rb') as handle:
    model_thr = pickle.load(handle)

with open('normalize_2023.pickle', 'rb') as handle:
    normalize_obj = pickle.load(handle)
    min_a, max_a = normalize_obj['min_a'], normalize_obj['max_a']

def normalize3(a, min_a=None, max_a=None):
    if min_a is None: min_a, max_a = np.min(a, axis=0), np.max(a, axis=0)
    return ((a - min_a) / (max_a - min_a + 0.0001)), min_a, max_a

def denormalize3(a_norm, min_a, max_a):
    return a_norm * (max_a - min_a + 0.0001) + min_a


def fetch_between_dates(start_date, end_date, db_name="data.db", table_name="sensor_data"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    
    cursor.execute(f"""
        SELECT * FROM {table_name} WHERE timestamp BETWEEN ? AND ?
    """, (start_date, end_date))
    
    rows = cursor.fetchall()
    conn.close()

    if not rows:
        return np.array([])
    
    return np.array(rows)

def fetch_last_rows(num_row, db_name="data.db", table_name="sensor_data"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    cursor.execute(f"""
        SELECT * FROM {table_name} ORDER BY timestamp DESC LIMIT ?
    """, (num_row,))
    
    rows = cursor.fetchall()
    conn.close()

    if not rows:
        return np.array([])
    
    return np.array(rows)

def convert_timestamp(timestamp_str):
    dt = datetime.fromisoformat(timestamp_str)
    return pd.Timestamp(dt.strftime('%Y-%m-%d %H:%M:%S'))

def percentage2severity(value):
    return (
        1 if 0 <= value < 5 else
        2 if 5 <= value < 20 else
        3 if 20 <= value < 40 else
        4 if 40 <= value < 75 else
        5 if 75 <= value <= 100 else
        6
    )
    
def calc_counterPercentage(threshold_percentages):
    counter_feature = {}
    for modex_idx, values_pred in threshold_percentages.items():
        values_pred = dict(sorted(values_pred.items(), key=lambda item: item[1], reverse=True)[:10])
        for name_feat, percentage in values_pred.items():
            if name_feat in counter_feature:
                counter_feature[name_feat]["count"] = counter_feature[name_feat]["count"] + 1
                counter_feature[name_feat]["percentage"] = counter_feature[name_feat]["percentage"] + percentage
            else:
                counter_feature[name_feat] = {"count": 1, "percentage": percentage}

    counter_feature_s1 = dict(sorted(counter_feature.items(), key=lambda item: item[1]['count'], reverse=True)[:10])
    counter_feature_s2 = dict(sorted(counter_feature_s1.items(), key=lambda item: item[1]['percentage'] // len(model_array), reverse=True))
    #counter_feature_s2_rank = dict(sorted(counter_feature_s1.items(), key=lambda item: item[1]['count'], reverse=True))

    for key, value in counter_feature_s2.items():
        counter_feature_s2[key]['count'] = (counter_feature_s2[key]['count'] / len(model_array)) * 100
        counter_feature_s2[key]['severity'] = percentage2severity(counter_feature_s2[key]['percentage'] // len(model_array))
        counter_feature_s2[key]['percentage'] = (counter_feature_s2[key]['percentage'] // len(model_array))

    # Find Which Model Have Highest Confidence
    counter_feature_plot = {}
    for index, value in counter_feature_s2.items():
        higher_data = {"model": 0, "percentage": 0}
        for model_idx in threshold_percentages:
            if index in threshold_percentages[model_idx]:
                if higher_data["percentage"] <= threshold_percentages[model_idx][index]:
                    higher_data["model"] = model_idx
                    higher_data["percentage"] = threshold_percentages[model_idx][index]
        
        counter_feature_plot[index] = higher_data['model']

    return counter_feature_s2, counter_feature_plot

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.stats import spearmanr
import numpy as np

In [None]:
feature_set.index("LGB cooling water flow")

In [None]:
from scipy.signal import medfilt

In [None]:

def hampel_filter(series, window_size=3, n_sigmas=3):
    new_series = series.copy()
    k = 1.4826  # scale factor for Gaussian distribution
    n = len(series)

    for i in range(window_size, n - window_size):
        window = series[i - window_size:i + window_size + 1]
        median = np.median(window)
        mad = k * np.median(np.abs(window - median))
        if np.abs(series[i] - median) > n_sigmas * mad:
            new_series[i] = median
    return new_series


In [None]:
end_date = "2021-05-28T05:55:00"
start_date = "2021-04-28T06:10:00"

severity_trending_datas = fetch_between_dates(start_date, end_date, "db/severity_trendings.db", "severity_trendings")
sensor_datas = fetch_between_dates(start_date, end_date, "db/severity_trendings.db", "original_sensor")
    
data_timestamp = sensor_datas[:, 1]
severity_trending_datas = severity_trending_datas[:, 2:].astype(float)
sensor_datas = sensor_datas[:, 2:].astype(float)

for i in range(len(feature_set)):
    severity_trending_datas[:, i] = hampel_filter(severity_trending_datas[:, i], window_size=300, n_sigmas=10)

datetime_index = pd.to_datetime(data_timestamp)
series = pd.Series(severity_trending_datas[:, 23], index=datetime_index)
series = series.asfreq('15min')

result = seasonal_decompose(series, model='additive', period=96 * 2)
result.plot()

trend = result.trend.dropna()

x = np.arange(len(trend))
corr, _ = spearmanr(x, trend)
print("Spearman correlation:", corr)

if corr > 0.3:
    priority = 1  # increasing
elif corr < -0.3:
    priority = 3  # decreasing
else:
    priority = 2  # flat

priority

In [None]:
import numpy as np
from scipy.stats import skew, kurtosis

def extract_time_series_features(x):
    x = np.asarray(x)

    features = {}

    # Basic statistics
    features['mean'] = np.mean(x)
    features['min'] = np.min(x)
    features['max'] = np.max(x)
    features['std'] = np.std(x)
    features['range'] = np.ptp(x)  # max - min
    features['mad'] = np.mean(np.abs(x - np.mean(x)))  # Mean Absolute Deviation
    features['rms'] = np.sqrt(np.mean(x**2))  # Root Mean Square
    features['skewness'] = skew(x)
    features['kurtosis'] = kurtosis(x)

    # Monotonicity (fraction of increasing / decreasing)
    diffs = np.diff(x)
    features['monotonic_increasing'] = np.sum(diffs > 0) / len(diffs)
    features['monotonic_decreasing'] = np.sum(diffs < 0) / len(diffs)

    # Slope (linear trend) and gradient
    if len(x) > 1:
        t = np.arange(len(x))
        slope = np.polyfit(t, x, 1)[0]
        features['slope'] = slope
    else:
        features['slope'] = np.nan

    features['gradient_mean'] = np.mean(np.gradient(x))

    df = pd.DataFrame([features])
    return features


In [None]:
overlap = 0.2
window_size = 1492
step_size = int(window_size * (1 - overlap))

for start in range(0, len(severity_trending_datas) - window_size + 1, step_size):
    #windows.append(data[start:start + window_size])
    print(start, start + window_size)

In [None]:
np.gradient(severity_trending_datas[:, 8])

In [None]:
plt.plot(severity_trending_datas[:, 8])

In [None]:
pca = PCA(n_components=3)
data_pca = pca.fit_transform(severity_trending_datas)

In [None]:
plt.plot(data_pca[:, 2])

In [None]:
current_trending.shape

In [None]:


current_trending = severity_trending_datas[:, 8] # 8 22
current_trending = np.convolve(current_trending, kernel, mode='same')

y = (current_trending - 0) / (100 - 0)
x = np.linspace(0, 30, len(y)).reshape(-1, 1) 

reg1 = LinearRegression()
reg1.fit(x, y)

y_linear1 = reg1.predict(x)

plt.plot(x, y, label="Unknown Function", color='blue')
plt.plot(x, y_linear1, label="Linear Regression Fit", linestyle="dashed", color='orange')
plt.legend()
plt.grid()
plt.show()

reg1.coef_[0]


In [None]:
reg1 = LinearRegression()
reg2 = LinearRegression()

x = np.linspace(0, 10, 100).reshape(-1, 1) 
y = (1 / (0.001 + np.exp(-(x - 5))))
y_rev = 1- (1 / (1 + np.exp(-(x - 5))))

reg1.fit(x, y)
reg2.fit(x, y_rev)

y_linear1 = reg1.predict(x)
y_linear2 = reg2.predict(x)

m1 = reg1.coef_[0]
m2 = reg2.coef_[0]

In [None]:
m1

In [None]:
plt.plot(x, y, label="Unknown Function", color='blue')
plt.plot(x, y_linear1, label="Linear Regression Fit", linestyle="dashed", color='orange')
plt.legend()
plt.grid()
plt.show()

plt.plot(x, y_rev, label="Unknown Function", color='blue')
plt.plot(x, y_linear2, label="Linear Regression Fit", linestyle="dashed", color='orange')
plt.legend()
plt.grid()
plt.show()

In [None]:
reg1 = LinearRegression()
reg2 = LinearRegression()

x = np.linspace(0, 10, 100).reshape(-1, 1) 
y = 10 * x
y_rev = -10 * x

reg1.fit(x, y)
reg2.fit(x, y_rev)

y_linear1 = reg1.predict(x)
y_linear2 = reg2.predict(x)

m1 = reg1.coef_[0]
m2 = reg2.coef_[0]

In [None]:
m1

In [None]:

reg.fit(x, y)  # Fit y = mx + b
m = reg.coef_[0]  # Extract slope (gradient)
b = reg.intercept_  # Extract intercept

# Compute predicted values from regression
y_linear = reg.predict(x)

In [None]:
x = np.linspace(0, 10, 100)
positive_gradients = [0.5 * x, x, 10 * x]  # Increasing slopes
stationary_gradient = np.zeros_like(x)  # Flat line
negative_gradients = [-0.5 * x, -x, -2 * x]  # Decreasing slopes

# Define labels
labels = ['Positive (0.5x)', 'Positive (x)', 'Positive (2x)',
          'Stationary (0)',
          'Negative (-0.5x)', 'Negative (-x)', 'Negative (-2x)']

# Define colors
colors = ['green', 'lime', 'darkgreen', 'black', 'red', 'orange', 'darkred']

# Create the plot
plt.figure(figsize=(8, 6))

# Plot positive gradients
for i, y in enumerate(positive_gradients):
    plt.plot(x, y, label=labels[i], color=colors[i])

# Plot stationary gradient
plt.plot(x, stationary_gradient, label=labels[3], color=colors[3], linestyle='dashed')

# Plot negative gradients
for i, y in enumerate(negative_gradients, start=4):
    plt.plot(x, y, label=labels[i], color=colors[i])

# Customize plot
plt.axhline(0, color='gray', linewidth=0.5)
plt.axvline(0, color='gray', linewidth=0.5)
plt.legend()
plt.title("Plot of Positive, Stationary, and Negative Gradients")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.grid(True, linestyle='--', alpha=0.7)

# Show the plot
plt.show()


In [None]:
import pickle
import os
import pandas as pd
import numpy as np
import sqlite3
from tqdm import tqdm
import matplotlib.dates as mdates
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.gridspec import GridSpec
from matplotlib.dates import DateFormatter

In [None]:
def percentage2severity(value):
    return (
        1 if 0 <= value < 5 else
        2 if 5 <= value < 20 else
        3 if 20 <= value < 40 else
        4 if 40 <= value < 75 else
        5 if 75 <= value <= 100 else
        6
    )
    
def calc_counterPercentage(threshold_percentages):
    counter_feature = {}
    for modex_idx, values_pred in threshold_percentages.items():
        values_pred = dict(sorted(values_pred.items(), key=lambda item: item[1], reverse=True)[:10])
        for name_feat, percentage in values_pred.items():
            if name_feat in counter_feature:
                counter_feature[name_feat]["count"] = counter_feature[name_feat]["count"] + 1
                counter_feature[name_feat]["percentage"] = counter_feature[name_feat]["percentage"] + percentage
            else:
                counter_feature[name_feat] = {"count": 1, "percentage": percentage}

    counter_feature_s1 = dict(sorted(counter_feature.items(), key=lambda item: item[1]['count'], reverse=True)[:10])
    counter_feature_s2 = dict(sorted(counter_feature_s1.items(), key=lambda item: item[1]['percentage'] // len(model_array), reverse=True))
    #counter_feature_s2_rank = dict(sorted(counter_feature_s1.items(), key=lambda item: item[1]['count'], reverse=True))

    for key, value in counter_feature_s2.items():
        counter_feature_s2[key]['count'] = (counter_feature_s2[key]['count'] / len(model_array)) * 100
        counter_feature_s2[key]['severity'] = percentage2severity(counter_feature_s2[key]['percentage'] // len(model_array))
        counter_feature_s2[key]['percentage'] = (counter_feature_s2[key]['percentage'] // len(model_array))

    # Find Which Model Have Highest Confidence
    counter_feature_plot = {}
    for index, value in counter_feature_s2.items():
        higher_data = {"model": 0, "percentage": 0}
        for model_idx in threshold_percentages:
            if index in threshold_percentages[model_idx]:
                if higher_data["percentage"] <= threshold_percentages[model_idx][index]:
                    higher_data["model"] = model_idx
                    higher_data["percentage"] = threshold_percentages[model_idx][index]
        
        counter_feature_plot[index] = higher_data['model']

    return counter_feature_s2, counter_feature_plot

def calc_counterPercentageTrending(threshold_percentages):
    counter_feature = {}
    for modex_idx, values_pred in threshold_percentages.items():
        for name_feat, percentage in values_pred.items():
            if name_feat in counter_feature:
                if percentage > 5.0:
                    counter_feature[name_feat]["count"] = counter_feature[name_feat]["count"] + 1
                    counter_feature[name_feat]["percentage"] = counter_feature[name_feat]["percentage"] + percentage
            else:
                counter_feature[name_feat] = {"count": 1, "percentage": percentage}

    for key, value in counter_feature.items():
        counter_feature[key]['count'] = (counter_feature[key]['count'] / len(model_array)) * 100
        if counter_feature[key]['count'] >= 20.0:
            counter_feature[key]['severity'] = percentage2severity(counter_feature[key]['percentage'] // len(model_array))
            counter_feature[key]['percentage'] = (counter_feature[key]['percentage'] // len(model_array))
        else:
            counter_feature[key]['severity'] = 1
            counter_feature[key]['percentage'] = 0.0

    return counter_feature

def do_plotSeverityRank():
    fig = plt.figure(figsize=(16, 8))
    gs = GridSpec(4, 3, figure=fig)

    feature_index_list = [feature_set.index(feat_name) for feat_name in list(counter_feature_s2.keys())]
    for idx, (feature_index_now) in enumerate(feature_index_list[:4]):
        model_idx_highest = counter_feature_plot[feature_set[feature_index_now]]

        ax = fig.add_subplot(gs[idx, :2])
        ax.plot(df_timestamp, temp_ypreds[model_idx_highest][:, feature_index_now], color='blue', label='Prediction')
        ax.plot(df_timestamp, df_feature[:, feature_index_now], color='red', label='Original')
        ax.set_title(feature_set[feature_index_now])
        ax.legend() 
        ax.grid(True)

    date_format = DateFormatter("%d/%m/%Y - %H:%M")  # Define the desired format
    plt.gca().xaxis.set_major_formatter(date_format)
    plt.gcf().autofmt_xdate()

    y2 = list(counter_feature_s2.keys())
    x2 = [value['severity'] for value in counter_feature_s2.values()]
    x2_c = [value['count'] for value in counter_feature_s2.values()]

    norm_x2 = [(val - 1) / 5 for val in x2]
    cmap = LinearSegmentedColormap.from_list('severity_colormap', ['green', 'yellow', 'red'])
    colors = [cmap(norm) for norm in norm_x2]

    ax3 = fig.add_subplot(gs[:3, 2])
    bars = ax3.barh(y2, x2, color=colors)
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=1, vmax=6))
    sm.set_array([])
    cbar = plt.colorbar(sm, ax=ax3, orientation='vertical', label='Severity')

    for bar, perc in zip(bars, x2_c):
        width = bar.get_width()  # Get the width of the bar
        ax3.text(
            width - 0.1,             # X-coordinate (inside the bar, near the right edge)
            bar.get_y() + bar.get_height() / 2,  # Y-coordinate (center of the bar)
            f"{int(perc)}%",            # Text label (percentage with % sign)
            va='center',           # Vertical alignment
            ha='right',            # Horizontal alignment
            color='black',         # Text color for visibility
            fontsize=9            # Font size
        )
    ax3.invert_yaxis()
    ax3.set_xticks(range(1, 8))
    ax3.set_ylabel("Parameter")
    ax3.set_xlabel("Severity")
    ax3.set_title("Severity Rank")

    #fig.suptitle(f"{df_anomaly_unplaned.values[failure_index_list, 4]}_{df_anomaly_unplaned.values[failure_index_list, 0]}", fontsize=16, fontweight='bold', y=0.98)

    plt.tight_layout()
    return fig


def fetch_between_dates(start_date, end_date, db_name="data.db", table_name="sensor_data"):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    
    cursor.execute(f"""
        SELECT * FROM {table_name} WHERE timestamp BETWEEN ? AND ?
    """, (start_date, end_date))
    
    rows = cursor.fetchall()
    conn.close()

    if not rows:
        return np.array([])
    
    return np.array(rows)

def convert_timestamp(timestamp_str):
    dt = datetime.fromisoformat(timestamp_str)
    return pd.Timestamp(dt.strftime('%Y-%m-%d %H:%M:%S'))



In [None]:
feature_set = ['Active Power', 'Reactive Power', 'Governor speed actual', 'UGB X displacement', 'UGB Y displacement',
    'LGB X displacement', 'LGB Y displacement', 'TGB X displacement',
    'TGB Y displacement', 'Stator winding temperature 13',
    'Stator winding temperature 14', 'Stator winding temperature 15',
    'Surface Air Cooler Air Outlet Temperature',
    'Surface Air Cooler Water Inlet Temperature',
    'Surface Air Cooler Water Outlet Temperature',
    'Stator core temperature', 'UGB metal temperature',
    'LGB metal temperature 1', 'LGB metal temperature 2',
    'LGB oil temperature', 'Penstock Flow', 'Turbine flow',
    'UGB cooling water flow', 'LGB cooling water flow',
    'Generator cooling water flow', 'Governor Penstock Pressure',
    'Penstock pressure', 'Opening Wicked Gate', 'UGB Oil Contaminant',
    'Gen Thrust Bearing Oil Contaminant']

# feature_set = ['Active Power', 'Governor speed actual', 
#     'UGB X displacement', 'UGB Y displacement', 'LGB X displacement', 'LGB Y displacement', 'TGB X displacement', 'TGB Y displacement', 
#     'Stator core temperature', 'Stator winding temperature 13', 'Stator winding temperature 14', 'Stator winding temperature 15',
#     'Surface Air Cooler Air Outlet Temperature', 'Surface Air Cooler Water Inlet Temperature', 'Surface Air Cooler Water Outlet Temperature',
#     'Gen Voltage Phase 1', 'Gen Voltage Phase 2', 'Gen Voltage Phase 3',
#     'Gen Current Phase 1', 'Gen Current Phase 2', 'Gen Current Phase 3', 
#     'UGB metal temperature', 'LGB metal temperature 1', 'LGB metal temperature 2',
#     'UGB oil temperature', 'LGB oil temperature', 'UGB cooling water flow', 'LGB cooling water flow', 'Generator cooling water flow',
#     'UGB Oil Contaminant', 'Gen Thrust Bearing Oil Contaminant',
#     'Penstock Flow', 'Turbine flow', 'Governor Penstock Pressure', 'Penstock pressure', 'Opening Wicked Gate']

model_array = ["Attention", "DTAAD", "MAD_GAN", "TranAD", "DAGMM", "USAD", "OmniAnomaly"]

# window_size = 15
# kernel = np.ones(window_size) / window_size

In [None]:
# conn = sqlite3.connect("db_data/original_data.db")
# cursor = conn.cursor()
# cursor.execute(f"""SELECT * FROM original_data order by rowid desc LIMIT 1""")
# rows = cursor.fetchall()
# conn.close()
# last_date = np.datetime64(np.array(rows)[:, 1][0]) 

In [None]:
end_dates_lastest = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") #"2025-03-27T05:36:00" 
timestamp = datetime.strptime(end_dates_lastest, "%Y-%m-%dT%H:%M:%S")
hours_2before = timestamp - timedelta(hours=2)
beofre_15min = timestamp - timedelta(minutes=60)
hours_2before_str = hours_2before.strftime("%Y-%m-%dT%H:%M:%S")

threshold_percentages = {}
for idx_model, (model_name) in enumerate(model_array):
    now_fetched = fetch_between_dates(beofre_15min.strftime("%Y-%m-%dT%H:%M:%S"), end_dates_lastest, "db/threshold_data.db", model_name)[-1, 2:]

    threshold_pass = {}
    for idx_sensor, sensor_thre in enumerate(now_fetched):
        threshold_pass[feature_set[idx_sensor]] = float(sensor_thre)

    threshold_percentages[idx_model] = threshold_pass

temp_original_data = fetch_between_dates(hours_2before_str, end_dates_lastest, "db/original_data.db", "original_data")
df_timestamp, df_feature = temp_original_data[:, 1], temp_original_data[:, 2:].astype(np.float16)
df_timestamp = np.array([convert_timestamp(now_str) for now_str in df_timestamp])

temp_ypreds = {}
for idx_model, (model_name) in enumerate(model_array):
    temp_ypreds[idx_model] = fetch_between_dates(hours_2before_str, end_dates_lastest, "db/pred_data.db", model_name)[:, 2:].astype(np.float16)

counter_feature_s2, counter_feature_plot = calc_counterPercentage(threshold_percentages)
df_feature_send = []
y_pred_send = []

feature_index_list = [feature_set.index(feat_name) for feat_name in list(counter_feature_s2.keys())]
for idx, (feature_index_now) in enumerate(feature_index_list[:4]):
    model_idx_highest = counter_feature_plot[feature_set[feature_index_now]]

    df_feature_send.append(temp_ypreds[model_idx_highest][:, feature_index_now])
    y_pred_send.append(df_feature[:, feature_index_now])

df_feature_send = np.vstack(df_feature_send).T
y_pred_send = np.vstack(y_pred_send).T

# To Send counter_feature_s2, df_feature_send, y_pred_send

fig = do_plotSeverityRank()

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
import torch.nn as nn

l = nn.MSELoss(reduction='none')

In [None]:
df_feature_send = []
y_pred_send = []
loss_send = []
thr_now_model = []

feature_index_list = [feature_set.index(feat_name) for feat_name in list(counter_feature_s2.keys())]
for idx, (feature_index_now) in enumerate(feature_index_list[:4]):
    model_idx_highest = counter_feature_plot[feature_set[feature_index_now]]

    df_feature_send.append(temp_ypreds[model_idx_highest][:, feature_index_now])
    y_pred_send.append(df_feature[:, feature_index_now])
    
    loss_send.append(df_feature[:, feature_index_now])
    thr_now_model.append(float(model_thr[model_array[model_idx_highest]][feature_index_now]))


# df_feature_send = np.vstack(df_feature_send).T
# y_pred_send = np.vstack(y_pred_send).T

In [None]:
temp_ypreds[model_idx_highest][:, feature_index_now]

In [None]:
l(, )

In [None]:
df_feature_send[0]

In [None]:
thr_now_model

In [None]:
counter_feature = {}
for modex_idx, values_pred in threshold_percentages.items():
    values_pred = dict(sorted(values_pred.items(), key=lambda item: item[1], reverse=True)[:10])
    for name_feat, percentage in values_pred.items():
        if name_feat in counter_feature:
            counter_feature[name_feat]["count"] = counter_feature[name_feat]["count"] + 1
            counter_feature[name_feat]["percentage"] = counter_feature[name_feat]["percentage"] + percentage
        else:
            counter_feature[name_feat] = {"count": 1, "percentage": percentage}

counter_feature

In [None]:
counter_feature_s1 = dict(sorted(counter_feature.items(), key=lambda item: item[1]['count'], reverse=True)[:10])
counter_feature_s1

In [None]:
import scipy

In [None]:
end_dates_lastest = "2025-03-27T05:36:00" 
timestamp = datetime.strptime(end_dates_lastest, "%Y-%m-%dT%H:%M:%S")
hours_2before = timestamp - timedelta(days=30)
hours_2before_str = hours_2before.strftime("%Y-%m-%dT%H:%M:%S")

data_between_dates = fetch_between_dates(hours_2before_str, end_dates_lastest, "db/severity_trendings.db", "severity_trendings")

data_feature = data_between_dates[:, 2:].astype(float)
data_timestamp = np.array([convert_timestamp(now_str) for now_str in data_between_dates[:, 1]])

fig, axes = plt.subplots(30, 1, figsize=(10, 75))
for i, ax in enumerate(axes):
    ax.plot(data_timestamp, scipy.signal.savgol_filter(data_feature[:, i], 50, 3))  # Plot data for each row
    ax.set_title(f'{feature_set[i]} ', fontsize=10)  # Set title
    #ax.grid(True, linestyle='--', alpha=0.5)
    ax.set_ylim(0, 100)
    ax.set_ylabel("Severity Percentage")
    #ax.set_xticks(data_timestamp[::7])  # Reduce number of ticks

plt.tight_layout()
plt.show()

In [None]:
# def denormalize3(a_norm, min_a, max_a):
#     return a_norm * (max_a - min_a + 0.0001) + min_a

# with open('normalize_2023.pickle', 'rb') as handle:
#     normalize_obj = pickle.load(handle)
#     min_a, max_a = normalize_obj['min_a'], normalize_obj['max_a']

# with open('model_thr.pickle', 'rb') as handle:
#     model_thr = pickle.load(handle)

# for model_now in model_array:
#     model_thr[model_now] = denormalize3(np.array(model_thr[model_now]), min_a, max_a).tolist()

# with open('model_thr.pickle', 'wb') as handle:
#     pickle.dump(model_thr, handle, protocol=pickle.HIGHEST_PROTOCOL)