### SUD Box tool: EDA

## Goals:
1. checking if all tags available on historian server
1. Perform exploratory analysis of the data
2. Identify data-related issues and ways to address them


### TODO

1.

### Questions
1.

## Setup



### Imports

In [92]:

import pandas as pd
import pyodbc as pyodbc

import logging
import queries

import sys
import os
import logging
import sys
import argparse
import pytz
from os.path import join, split 

from datetime import datetime, timedelta
from tomlkit import parse, dumps, loads
from typing import List, Tuple

import historian


from sud_tools import *
from sud_utils import *
from tool_utils import *



### Reading in the configuration file


In [93]:
cfg_file_nm = 'etl_config.toml'
cfg_file_path = join(os.getcwd(), cfg_file_nm)
# checking if config file exists
if not os.path.isfile(cfg_file_path):
    raise ValueError(f'No config file was found at: {cfg_file_path}')
else:
    # reading in the config file
    cfg = loads(open(cfg_file_path).read())

In [94]:
tags_list_path = cfg['job']['path_to_tag_list']
print(f'Path to tag list table: {tags_list_path}')

Path to tag list table: F:\Ecosystem Non-OneDrive\Development Area\MVP048 SUD Box tool\Data\Input\tags_list.xlsx


### Setting up the connection to DatalabDB

In [95]:
# datalab integration
datalab_cfg = cfg['datalab_db'].copy()

# setting up run configuration with parameters from cfg file
datalab_db_conn = get_mssql_conn_string(**datalab_cfg['connection'])
datalab_db_access_url = r"{}".format(datalab_cfg['access_token_url']['access_token_url'])
datalab_db_access_token = get_azure_sql_db_access_token(datalab_db_access_url)
rejects_agg_freq = datalab_cfg['output_tables']['agg_freq']

print(f'Datalab connection parameters: {datalab_db_conn}')

Datalab connection parameters: Server=azpg-sqlserver-fhcenganalyticsdatalab.database.windows.net; Database=DatalabDB; Driver=ODBC Driver 17 for SQL Server;


### Fetching the tag list information


In [5]:
# reading the tag list file
tags_list_path = cfg['job']['path_to_tag_list']
print(f'Path to tag list table: {tags_list_path}')

# cheking if the logs folder exist
if not os.path.exists(tags_list_path):
    print(f'INFO: Didn not find logs folder at {tags_list_path}.')
    raise ValueError('Was not able to file tags list. Aborting...')
else:
    tags_list_df = pd.read_excel(tags_list_path)
    print(f'Shape of the tags list table: {tags_list_df.shape}')

Path to tag list table: F:\Ecosystem Non-OneDrive\Development Area\MVP048 SUD Box tool\Data\Input\tags_list.xlsx
Shape of the tags list table: (130, 1)


In [6]:
tags_list_df.head()

Unnamed: 0,TagName
0,LXXX_Cover_General_ExtractedCartons_Total_Coun...
1,LXXX_Cover_General_ProducedCovers_Total_Counte...
2,LXXX_Cover_General_RejectedCovers_0_Counter_Ac...
3,LXXX_Cover_Reshipper_good_inserted_n
4,LXXX_Cover_Reshipper_bad_rejected_n


### site specific config

In [64]:
site_cd = 'url'
site_cfg = cfg['sites'][site_cd].copy()

site_name = site_cfg['site_name']
site_enabled = site_cfg['enabled']


site_history_buffer_days = site_cfg['history_buffer_days']

site_servers = site_cfg['servers'].copy()
site_lines = site_cfg['lines']['lines']
site_lines_agile = site_cfg['lines']['lines_agile']

site_historian_tags = site_cfg['tags'].copy()

site_tz = site_servers['timezone']
site_dttm_format = site_servers['dttm_format']
site_historian_source = site_servers['historian']['source']
site_history_depth = site_servers['historian']['history_depth_days']
site_days_to_retake = site_history_depth + site_history_buffer_days



In [8]:

site_tags = tags_list_df.loc[:, 'TagName'].unique()
site_tags = ['_'.join(t.split('_')[1:]) for t in site_tags]
site_tags

['Cover_General_ExtractedCartons_Total_Counter_Actual_n',
 'Cover_General_ProducedCovers_Total_Counter_Actual_n',
 'Cover_General_RejectedCovers_0_Counter_Actual_n',
 'Cover_Reshipper_good_inserted_n',
 'Cover_Reshipper_bad_rejected_n',
 'Cover_Extraction_Turret_CartonsNotExtracted_0_Actual_n',
 'Cover_TransportBelt_CheckExternal_0_0_Rejected_n',
 'Cover_TransportBelt_CheckInternal_0_0_Rejected_n',
 'Cover_TransportBelt_RobotTrack_Skipped_0_Rejected_n',
 'Cover_Forming_PatchErectionCheck_Bad_Counter_Rejected_n',
 'Cover_Forming_PatchErectionCheck_BadAngle_Counter_Rejected_n',
 'Cover_Forming_PatchErectionCheck_NoFeedback_Counter_Rejected_n',
 'Cover_Reshipper_BCR_NotRead_Rejected_n',
 'Cover_Reshipper_BCR_MisMatch_Rejected_n',
 'Cover_Reshipper_BCR_NoFeedback_Rejected_n',
 'Cover_TransportBelt_HoleFlap_0_Check_Rejected_n',
 'Cover_Former1_ExternalGlueCamera_Bad_0_Rejected_n',
 'Cover_Former1_ExternalGlueCamera_NoFeedback_0_Rejected_n',
 'Cover_Former1_InternalGlueCamera_Bad_0_Rejected_

In [9]:
site_tags=[tag.replace(u'\xa0',u'') for tag in site_tags]
site_tags

['Cover_General_ExtractedCartons_Total_Counter_Actual_n',
 'Cover_General_ProducedCovers_Total_Counter_Actual_n',
 'Cover_General_RejectedCovers_0_Counter_Actual_n',
 'Cover_Reshipper_good_inserted_n',
 'Cover_Reshipper_bad_rejected_n',
 'Cover_Extraction_Turret_CartonsNotExtracted_0_Actual_n',
 'Cover_TransportBelt_CheckExternal_0_0_Rejected_n',
 'Cover_TransportBelt_CheckInternal_0_0_Rejected_n',
 'Cover_TransportBelt_RobotTrack_Skipped_0_Rejected_n',
 'Cover_Forming_PatchErectionCheck_Bad_Counter_Rejected_n',
 'Cover_Forming_PatchErectionCheck_BadAngle_Counter_Rejected_n',
 'Cover_Forming_PatchErectionCheck_NoFeedback_Counter_Rejected_n',
 'Cover_Reshipper_BCR_NotRead_Rejected_n',
 'Cover_Reshipper_BCR_MisMatch_Rejected_n',
 'Cover_Reshipper_BCR_NoFeedback_Rejected_n',
 'Cover_TransportBelt_HoleFlap_0_Check_Rejected_n',
 'Cover_Former1_ExternalGlueCamera_Bad_0_Rejected_n',
 'Cover_Former1_ExternalGlueCamera_NoFeedback_0_Rejected_n',
 'Cover_Former1_InternalGlueCamera_Bad_0_Rejected_

In [10]:
all_tag_list=get_tags_list(lines=site_lines,sensors=site_tags,sep='_', topic=site_servers['historian']['proficy']['topic'])
#removing PO related tags
all_tag_list=[t for t in all_tag_list if 'CurrentPO' not in t]
print(len(all_tag_list))
#all_tag_list

126


In [11]:
all_tag_list

['URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n',
 'URL-PACK.L13_Cover_General_ProducedCovers_Total_Counter_Actual_n',
 'URL-PACK.L13_Cover_General_RejectedCovers_0_Counter_Actual_n',
 'URL-PACK.L13_Cover_Reshipper_good_inserted_n',
 'URL-PACK.L13_Cover_Reshipper_bad_rejected_n',
 'URL-PACK.L13_Cover_Extraction_Turret_CartonsNotExtracted_0_Actual_n',
 'URL-PACK.L13_Cover_TransportBelt_CheckExternal_0_0_Rejected_n',
 'URL-PACK.L13_Cover_TransportBelt_CheckInternal_0_0_Rejected_n',
 'URL-PACK.L13_Cover_TransportBelt_RobotTrack_Skipped_0_Rejected_n',
 'URL-PACK.L13_Cover_Forming_PatchErectionCheck_Bad_Counter_Rejected_n',
 'URL-PACK.L13_Cover_Forming_PatchErectionCheck_BadAngle_Counter_Rejected_n',
 'URL-PACK.L13_Cover_Forming_PatchErectionCheck_NoFeedback_Counter_Rejected_n',
 'URL-PACK.L13_Cover_Reshipper_BCR_NotRead_Rejected_n',
 'URL-PACK.L13_Cover_Reshipper_BCR_MisMatch_Rejected_n',
 'URL-PACK.L13_Cover_Reshipper_BCR_NoFeedback_Rejected_n',
 'URL-PACK.L13_Cover_T

## Checking tag availability

In [12]:
historian.use_context('REST', 
                    client_id='historian_public_rest_api', 
                    client_password='publicapisecret',
                    app_id='sudanalytics.im', 
                    app_password='phoenix2021SUD', # change to environmental variable
                    port=site_servers['historian']['proficy']['port'],
                    verify_certificate=True)

In [13]:
all_site_tags_dict=historian.get_tag_names(site_servers['historian']['proficy']['server_name'],    
                        filter_name='*',    
                        filter_description='*',    
                        has_values=False,    
                        guaranteed_sample_before=None,    
                        newer_than=None,    
                        verbose=False)

In [14]:
all_site_tags_list = list(all_site_tags_dict.keys())
all_site_tags_list

['URL-CONV.L10_Anvil_ActualPosition',
 'URL-CONV.L10_Anvil_ActualVelocity',
 'URL-CONV.L10_Anvil_CommandPosition',
 'URL-CONV.L10_Anvil_CommandVelocity',
 'URL-CONV.L10_Anvil_MotorCapacity',
 'URL-CONV.L10_Anvil_PositionError',
 'URL-CONV.L10_Anvil_TorqueFeedback',
 'URL-CONV.L10_Anvil_TorqueFeedback_RMS',
 'URL-CONV.L10_AnvilPhaseOffset',
 'URL-CONV.L10_Bottom_Felt_New_Alarm',
 'URL-CONV.L10_Bottom_Felt_Roll_Pressure_bar',
 'URL-CONV.L10_Bottom_Heating_1_OP',
 'URL-CONV.L10_Bottom_Heating_1_PV',
 'URL-CONV.L10_Bottom_Heating_1_Temp_High_Alarm',
 'URL-CONV.L10_Bottom_Heating_1_Temp_Low_Alarm',
 'URL-CONV.L10_Bottom_Heating_2_OP',
 'URL-CONV.L10_Bottom_Heating_2_PV',
 'URL-CONV.L10_Bottom_Heating_2_Temp_High_Alarm',
 'URL-CONV.L10_Bottom_Heating_2_Temp_Low_Alarm',
 'URL-CONV.L10_Bottom_Heating_3_OP',
 'URL-CONV.L10_Bottom_Heating_3_PV',
 'URL-CONV.L10_Bottom_Heating_3_Temp_High_Alarm',
 'URL-CONV.L10_Bottom_Heating_3_Temp_Low_Alarm',
 'URL-CONV.L10_Bottom_Heating_4_OP',
 'URL-CONV.L10_B

In [15]:
available_tags=[t for t in all_tag_list if t in all_site_tags_list]
missing_tags=[t for t in all_tag_list if t not in available_tags]
print(len(available_tags),len(missing_tags))

126 0


In [16]:
missing_tags

[]

## high level check of tag values(min, max, negative values, etc.)
### extracting data for short timespan

In [35]:
start_time = pd.to_datetime('2022-05-23 12:00:00')
end_time = pd.to_datetime('2022-05-23 14:00:00')
print(f'Extracting data between: {start_time} and {end_time}')

Extracting data between: 2022-05-23 12:00:00 and 2022-05-23 14:00:00


In [36]:
historian_extract_df = historian.get_tag_values(
                    site_servers['historian']['proficy']['server_name'],
                    start_time=start_time,
                    end_time=end_time,
                    filter_name=list(all_tag_list))
print(f'Shape of data extract: {historian_extract_df.shape}')
                

Shape of data extract: (202, 2)


In [19]:
historian_extract_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Value,Quality
Tag,Timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Rejected_n,2022-05-23 12:54:22.672000+00:00,0,3
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Rejected_n,2022-05-23 12:53:44.295000+00:00,0,0
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Rejected_n,2022-05-23 12:53:36.075000+00:00,0,0
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Rejected_n,2022-05-23 12:52:36.416000+00:00,0,0
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Rejected_n,2022-05-23 12:52:27.072000+00:00,0,0
...,...,...,...
URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:45:17.049000+00:00,0,0
URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:44:17.529000+00:00,22,3
URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:43:47.121000+00:00,0,0
URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:43:41.059000+00:00,0,0


In [20]:
### flatten data
extract_df=historian_extract_df.reset_index()
extract_df

Unnamed: 0,Tag,Timestamp,Value,Quality
0,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:54:22.672000+00:00,0,3
1,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:53:44.295000+00:00,0,0
2,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:53:36.075000+00:00,0,0
3,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:52:36.416000+00:00,0,0
4,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:52:27.072000+00:00,0,0
...,...,...,...,...
197,URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:45:17.049000+00:00,0,0
198,URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:44:17.529000+00:00,22,3
199,URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:43:47.121000+00:00,0,0
200,URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:43:41.059000+00:00,0,0


In [21]:
extract_df=extract_df.assign(Value=(pd.to_numeric(extract_df['Value'])))
extract_df

Unnamed: 0,Tag,Timestamp,Value,Quality
0,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:54:22.672000+00:00,0,3
1,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:53:44.295000+00:00,0,0
2,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:53:36.075000+00:00,0,0
3,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:52:36.416000+00:00,0,0
4,URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Re...,2022-05-23 12:52:27.072000+00:00,0,0
...,...,...,...,...
197,URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:45:17.049000+00:00,0,0
198,URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:44:17.529000+00:00,22,3
199,URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:43:47.121000+00:00,0,0
200,URL-PACK.L13_Upack_LineRecipe,2022-05-23 12:43:41.059000+00:00,0,0


In [22]:
extract_df.groupby('Tag').describe()

Unnamed: 0_level_0,Value,Value,Value,Value,Value,Value,Value,Value
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Tag,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
URL-PACK.L13_Secondary_FC11_Dataman_Reads_Result_Bad_n,9.0,16.666667,33.071891,0.0,0.0,0.0,0.0,75.0
URL-PACK.L13_Secondary_FC11_Dataman_Reads_Result_Mismatch_n,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
URL-PACK.L13_Secondary_FC11_Minibea_Reads_Result_Bad_n,9.0,0.444444,0.881917,0.0,0.0,0.0,0.0,2.0
URL-PACK.L13_Secondary_FC11_Minibea_Reads_Result_Mismatch_n,9.0,7.555556,14.992591,0.0,0.0,0.0,0.0,34.0
URL-PACK.L13_Secondary_FC11_Minibea_Reads_Result_Total_n,9.0,1538.666667,3053.197013,0.0,0.0,0.0,0.0,6924.0
URL-PACK.L13_Upack_BoxCheckweigher_0_Extra_Rejected_n,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_0_Rejected_n,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_1_NotRejected_n,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_2_NotRejected_n,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
URL-PACK.L13_Upack_BoxCheckweigher_0_Zone_3_Rejected_n,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
extract_df.groupby('Tag').describe().describe()

Unnamed: 0_level_0,Value,Value,Value,Value,Value,Value,Value,Value
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,11.222222,87.550265,173.487274,0.0,0.0,0.0,1.833333,393.277778
std,2.55655,362.177915,718.73185,0.0,0.0,0.0,5.335784,1629.967802
min,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,14.0,4.825397,7.955799,0.0,0.0,0.0,0.0,17.0
max,14.0,1538.666667,3053.197013,0.0,0.0,0.0,16.5,6924.0


In [24]:
#checking if Value contains negative values
(extract_df['Value']<0).any()

False

### Increasing timespan of data extraction for 2 day

In [25]:
## extracting data
start_dttm = datetime.now() - timedelta(days=1)
#start_dttm = start_dttm - timedelta(days=site_days_to_retake)
start_time = start_dttm
end_time = datetime.now()
print(f'Extracting data between: {start_time} and {end_time}')

Extracting data between: 2022-06-07 07:55:45.501470 and 2022-06-08 07:55:45.501470


In [26]:
historian_extract_df_new = historian.get_tag_values(
                    site_servers['historian']['proficy']['server_name'],
                    start_time=start_time,
                    end_time=end_time,
                    filter_name=list(all_tag_list))
print(f'Shape of data extract: {historian_extract_df_new.shape}')
                

Shape of data extract: (65, 2)


In [25]:
historian_extract_df_new

Unnamed: 0_level_0,Unnamed: 1_level_0,Value,Quality
Tag,Timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:18.241000+00:00,20276,3
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:17.225000+00:00,20274,3
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:16.240000+00:00,20272,3
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:15.225000+00:00,20269,3
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:14.240000+00:00,20267,3
...,...,...,...
URL-PACK.L13_Base_Checkweigher_Washcount_Check_WrongPosition_Rejected_n,2022-06-02 12:03:38.570000+00:00,1,3
URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoint_n,2022-06-03 05:10:01.674000+00:00,19,3
URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoint_n,2022-06-02 19:52:09.532000+00:00,0,3
URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoint_n,2022-06-02 06:29:27.883000+00:00,15,3


In [27]:
extract_df_new=historian_extract_df_new.reset_index()
extract_df_new.head()

Unnamed: 0,Tag,Timestamp,Value,Quality
0,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:18.241000+00:00,20276,3
1,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:17.225000+00:00,20274,3
2,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:16.240000+00:00,20272,3
3,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:15.225000+00:00,20269,3
4,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:14.240000+00:00,20267,3


In [28]:
extract_df_new=extract_df_new.assign(Value_num=(pd.to_numeric(extract_df_new['Value'])))
extract_df_new.head()

Unnamed: 0,Tag,Timestamp,Value,Quality,Value_num
0,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:18.241000+00:00,20276,3,20276.0
1,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:17.225000+00:00,20274,3,20274.0
2,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:16.240000+00:00,20272,3,20272.0
3,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:15.225000+00:00,20269,3,20269.0
4,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:14.240000+00:00,20267,3,20267.0


In [29]:
extract_df_new.groupby('Tag').describe().describe()

Unnamed: 0_level_0,Value_num,Value_num,Value_num,Value_num,Value_num,Value_num,Value_num,Value_num
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
count,87.0,87.0,80.0,87.0,87.0,87.0,87.0,87.0
mean,1243.816092,867.464061,575.112013,0.183908,424.605287,844.392069,1258.984023,1955.889425
std,4124.760939,3104.913948,1961.0501,1.402064,1527.418762,3029.463418,4500.853991,6986.556685
min,1.0,0.0,0.57735,0.0,0.0,0.0,0.0,0.0
25%,3.5,1.2,1.112697,0.0,0.5,1.0,1.875,2.0
50%,11.0,3.5,2.836039,0.0,1.75,3.0,5.25,8.0
75%,51.0,16.758571,15.328926,0.0,7.75,15.5,26.25,40.0
max,19030.0,14036.296479,8571.569619,13.0,6870.25,13557.0,20284.5,32049.0


In [30]:
extract_df_new.groupby('Tag').describe()

Unnamed: 0_level_0,Value_num,Value_num,Value_num,Value_num,Value_num,Value_num,Value_num,Value_num
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Tag,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_TimeOut_Rejected_n,8.0,2.250000,1.669046,0.0,1.00,2.0,3.25,5.0
URL-PACK.L13_Base_Checkweigher_Base_2Dcheck_NotRead_Rejected_n,2.0,0.500000,0.707107,0.0,0.25,0.5,0.75,1.0
URL-PACK.L13_Base_Checkweigher_Cover_2Dcheck_NotRead_Rejected_n,6.0,2.500000,1.870829,0.0,1.25,2.5,3.75,5.0
URL-PACK.L13_Base_Checkweigher_Tamper_2Dcheck_NotRead_Rejected_n,6.0,4.500000,3.885872,0.0,2.00,3.5,7.25,10.0
URL-PACK.L13_Base_Checkweigher_Tamper_2Dcheck_TimeOut_Rejected_n,2.0,0.500000,0.707107,0.0,0.25,0.5,0.75,1.0
...,...,...,...,...,...,...,...,...
URL-PACK.L13_Upack_CaseCheckweigher_Reject_ExternalReason_0Rejected_n,7.0,3.000000,2.160247,0.0,1.50,3.0,4.50,6.0
URL-PACK.L13_Upack_CaseCheckweigher_Reject_Weight_NOKRejected_n,3.0,0.666667,0.577350,0.0,0.50,1.0,1.00,1.0
URL-PACK.L13_Upack_LineRecipe,1.0,13.000000,,13.0,13.00,13.0,13.00,13.0
URL-PACK.L13_Upack_OLCP_1DCheck_0_NOKRejected_n,12.0,3.500000,2.540580,0.0,1.75,3.0,5.25,8.0


In [31]:
# checking if Value contains negative
(extract_df_new['Value_num']<0).any()

False

## checking counters validity

### reject counter tags

In [32]:
tags_to_extract=[t for t in all_tag_list if ('CaseCheckweigher_Reject_Case_Weight_Real' not in t) & ('LineRecipe' not in t)]
print(len(tags_to_extract))

124


### Extracting 2 day data for all reject counter tags

In [33]:
## defining timeframe for extracting data
start_dttm = datetime.now() - timedelta(days=2)
#start_dttm = start_dttm - timedelta(days=site_days_to_retake)
start_time = start_dttm
end_time = datetime.now()
print(f'Extracting data between: {start_time} and {end_time}')

Extracting data between: 2022-06-01 09:43:55.680492 and 2022-06-03 09:43:55.680492


In [34]:
historian_extract = historian.get_tag_values(
                    site_servers['historian']['proficy']['server_name'],
                    start_time=start_time,
                    end_time=end_time,
                    filter_name=list(tags_to_extract))
print(f'Shape of data extract: {historian_extract.shape}')
                

Shape of data extract: (89760, 2)


In [35]:
historian_extract

Unnamed: 0_level_0,Unnamed: 1_level_0,Value,Quality
Tag,Timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:18.241000+00:00,20276,3
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:17.225000+00:00,20274,3
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:16.240000+00:00,20272,3
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:15.225000+00:00,20269,3
URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n,2022-06-03 09:37:14.240000+00:00,20267,3
...,...,...,...
URL-PACK.L13_Base_Checkweigher_Washcount_Check_WrongPosition_Rejected_n,2022-06-02 16:53:08.164000+00:00,2,3
URL-PACK.L13_Base_Checkweigher_Washcount_Check_WrongPosition_Rejected_n,2022-06-02 12:03:38.570000+00:00,1,3
URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoint_n,2022-06-03 05:10:01.674000+00:00,19,3
URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoint_n,2022-06-02 19:52:09.532000+00:00,0,3


In [37]:
#flatten the table
historian_extract=historian_extract.reset_index()
historian_extract_dfn=historian_extract[['Tag','Timestamp','Value']]
historian_extract_dfn

Unnamed: 0,Tag,Timestamp,Value
0,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:18.241000+00:00,20276
1,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:17.225000+00:00,20274
2,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:16.240000+00:00,20272
3,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:15.225000+00:00,20269
4,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 09:37:14.240000+00:00,20267
...,...,...,...
89755,URL-PACK.L13_Base_Checkweigher_Washcount_Check...,2022-06-02 16:53:08.164000+00:00,2
89756,URL-PACK.L13_Base_Checkweigher_Washcount_Check...,2022-06-02 12:03:38.570000+00:00,1
89757,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-03 05:10:01.674000+00:00,19
89758,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-02 19:52:09.532000+00:00,0


In [38]:
# converting timestamp and value column
historian_extract_dfn=historian_extract_dfn.assign(Timestamp = historian_extract_dfn['Timestamp'].dt.tz_convert(site_tz).dt.tz_localize(None),
                                            Value = (pd.to_numeric(historian_extract_dfn['Value'])))
historian_extract_dfn

Unnamed: 0,Tag,Timestamp,Value
0,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 11:37:18.241,20276
1,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 11:37:17.225,20274
2,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 11:37:16.240,20272
3,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 11:37:15.225,20269
4,URL-PACK.L13_Cover_General_ExtractedCartons_To...,2022-06-03 11:37:14.240,20267
...,...,...,...
89755,URL-PACK.L13_Base_Checkweigher_Washcount_Check...,2022-06-02 18:53:08.164,2
89756,URL-PACK.L13_Base_Checkweigher_Washcount_Check...,2022-06-02 14:03:38.570,1
89757,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-03 07:10:01.674,19
89758,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-02 21:52:09.532,0


In [39]:
historian_extract_dfn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89760 entries, 0 to 89759
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Tag        89760 non-null  object        
 1   Timestamp  89760 non-null  datetime64[ns]
 2   Value      89760 non-null  int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 2.1+ MB


In [40]:
#sort by tag name and time
historian_extract_dfn=historian_extract_dfn.sort_values(['Tag','Timestamp'],ascending=[False,True])
historian_extract_dfn

Unnamed: 0,Tag,Timestamp,Value
89759,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-02 08:29:27.883,15
89758,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-02 21:52:09.532,0
89757,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-03 07:10:01.674,19
86682,URL-PACK.L13_Upack_OLCP_1DCheck_0_NOKRejected_n,2022-06-02 11:15:02.224,1
86681,URL-PACK.L13_Upack_OLCP_1DCheck_0_NOKRejected_n,2022-06-02 13:13:52.462,2
...,...,...,...
72337,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 13:19:28.479,4
72336,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 16:40:49.888,5
72335,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 21:52:06.532,0
72334,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-03 07:25:59.712,1


In [42]:
mask_rm = historian_extract_dfn['Value'].isnull() | (historian_extract_dfn['Value'] < 0)
counters_df = historian_extract_dfn.loc[~mask_rm].copy()
    
print(f'TRANSFORM: # removed records {sum(mask_rm)}')
print(f'TRANSFORM: Shape after step: {counters_df.shape}')

TRANSFORM: # removed records 0
TRANSFORM: Shape after step: (89760, 3)


In [43]:
#calculating number of rejects
group_by = ['Tag']
counters_df = counters_df.assign(reject_qty=(counters_df.groupby(group_by)['Value'].diff().values))
counters_df

Unnamed: 0,Tag,Timestamp,Value,reject_qty
89759,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-02 08:29:27.883,15,
89758,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-02 21:52:09.532,0,-15.0
89757,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-03 07:10:01.674,19,19.0
86682,URL-PACK.L13_Upack_OLCP_1DCheck_0_NOKRejected_n,2022-06-02 11:15:02.224,1,
86681,URL-PACK.L13_Upack_OLCP_1DCheck_0_NOKRejected_n,2022-06-02 13:13:52.462,2,1.0
...,...,...,...,...
72337,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 13:19:28.479,4,1.0
72336,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 16:40:49.888,5,1.0
72335,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 21:52:06.532,0,-5.0
72334,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-03 07:25:59.712,1,1.0


In [51]:
## defining the counter reset
mask_reset = (counters_df['reject_qty'] < 0) & ((counters_df['Value'] == 0)
            )
counters_df = counters_df.assign(reset_flag = mask_reset)
counters_df

Unnamed: 0,Tag,Timestamp,Value,reject_qty,prev_value,next_value,reset_flag
89759,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-02 08:29:27.883,15,,,0.0,False
89758,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-02 21:52:09.532,0,-15.0,15.0,19.0,True
89757,URL-PACK.L13_Upack_PO_PodsCount_Counter_SetPoi...,2022-06-03 07:10:01.674,19,19.0,0.0,,False
86682,URL-PACK.L13_Upack_OLCP_1DCheck_0_NOKRejected_n,2022-06-02 11:15:02.224,1,,,2.0,False
86681,URL-PACK.L13_Upack_OLCP_1DCheck_0_NOKRejected_n,2022-06-02 13:13:52.462,2,1.0,1.0,3.0,False
...,...,...,...,...,...,...,...
72337,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 13:19:28.479,4,1.0,3.0,5.0,False
72336,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 16:40:49.888,5,1.0,4.0,0.0,False
72335,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-02 21:52:06.532,0,-5.0,5.0,1.0,True
72334,URL-PACK.L13_Base_Checkweigher_Base _2Dcheck_T...,2022-06-03 07:25:59.712,1,1.0,0.0,2.0,False


In [48]:

## defining the counter not reset
mask_resetn = (counters_df['reject_qty'] < 0) & ((counters_df['Value'] != 0)
             )
counter_not_reset_df = counters_df.loc[mask_resetn]
counter_not_reset_df

Unnamed: 0,Tag,Timestamp,Value,reject_qty,prev_value,next_value,reset_flag
24776,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 08:29:28.851,41,-2.0,43.0,49.0,False
24581,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 11:46:31.290,1365,-1.0,1366.0,1373.0,False
24362,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 12:07:47.336,2862,-1.0,2863.0,2870.0,False
24270,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 12:11:51.332,3523,-1.0,3524.0,3527.0,False
23970,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 12:36:53.379,5730,-1.0,5731.0,5738.0,False
20633,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 21:27:03.487,28844,-1.0,28845.0,28848.0,False
20581,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 21:34:03.497,29187,-1.0,29188.0,29192.0,False
20579,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 21:34:06.498,29190,-2.0,29192.0,29187.0,False
20578,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 21:34:07.498,29187,-3.0,29190.0,29185.0,False
20577,URL-PACK.L13_Cover_General_ProducedCovers_Tota...,2022-06-02 21:34:10.482,29185,-2.0,29187.0,29183.0,False


In [49]:
len(counter_not_reset_df)

58

In [50]:
counter_not_reset_df['Tag'].unique()

array(['URL-PACK.L13_Cover_General_ProducedCovers_Total_Counter_Actual_n',
       'URL-PACK.L13_Cover_General_ExtractedCartons_Total_Counter_Actual_n',
       'URL-PACK.L13_Base_Machine_ProducedBases_0_Counter_Actual_n'],
      dtype=object)

## new ETL process

### tags for extracting reject data

In [127]:
reject_cause_dim_path = cfg['job']['path_to_reject_cause_dim']
print(f'Path to reject cause dimension table: {reject_cause_dim_path}')

# cheking if the reject cause dimension path exist
if not os.path.exists(reject_cause_dim_path):
    print(f'INFO: Didn not find reject cause table at {reject_cause_dim_path}.')
    raise ValueError('Was not able to find reject cause table. Aborting...')
else:
    reject_cause_df = pd.read_excel(reject_cause_dim_path)
    print(f'Shape of the reject cause table: {reject_cause_df.shape}')

Path to reject cause dimension table: F:\Ecosystem Non-OneDrive\Development Area\MVP048 SUD Box tool\Data\initial loading\REJECT_CAUSE_DIM.xlsx
Shape of the reject cause table: (97, 6)


In [128]:
reject_cause_df.head()

Unnamed: 0,id,tag_nm,cause,reason,station,type
0,1,Cover_Extraction_Turret_CartonsNotExtracted_0_...,Not extracted,Not extracted,Cover Transport Belt RejectsBlanks,Cover
1,2,Cover_TransportBelt_CheckExternal_0_0_Rejected_n,Check External,Transport belt,Cover Transport Belt RejectsBlanks,Cover
2,3,Cover_TransportBelt_CheckInternal_0_0_Rejected_n,Check Internal,Transport belt,Cover Transport Belt RejectsBlanks,Cover
3,4,Cover_TransportBelt_RobotTrack_Skipped_0_Rejec...,Robot track,Transport belt,Cover Transport Belt RejectsBlanks,Cover
4,5,Cover_Forming_PatchErectionCheck_Bad_Counter_R...,Patch Erection Check Bad,Patch reject,Cover Patch Control RejectsPatch,Cover


In [129]:
reject_tags = reject_cause_df.loc[:, 'tag_nm'].unique()
reject_tags = ['_'.join(t.split('_')) for t in reject_tags]
reject_tags

['Cover_Extraction_Turret_CartonsNotExtracted_0_Actual_n\xa0',
 'Cover_TransportBelt_CheckExternal_0_0_Rejected_n\xa0',
 'Cover_TransportBelt_CheckInternal_0_0_Rejected_n\xa0',
 'Cover_TransportBelt_RobotTrack_Skipped_0_Rejected_n\xa0',
 'Cover_Forming_PatchErectionCheck_Bad_Counter_Rejected_n\xa0',
 'Cover_Forming_PatchErectionCheck_BadAngle_Counter_Rejected_n\xa0',
 'Cover_Forming_PatchErectionCheck_NoFeedback_Counter_Rejected_n\xa0',
 'Cover_TransportBelt_HoleFlap_0_Check_Rejected_n\xa0',
 'Cover_Former1_ExternalGlueCamera_Bad_0_Rejected_n\xa0',
 'Cover_Former1_ExternalGlueCamera_NoFeedback_0_Rejected_n\xa0',
 'Cover_Former1_InternalGlueCamera_Bad_0_Rejected_n\xa0',
 'Cover_Former1_InternalGlueCamera_NoFeedback_0_Rejected_n\xa0',
 'Cover_Former1_ Glue_Dry _0_Rejected_n\xa0',
 'Cover_Former1_External_CoverNotSeenBeforeGlue_0_Rejected_n\xa0',
 'Cover_Former1_Internal_CoverNotSeenBeforeGlue_0_Rejected_n\xa0',
 'Cover_Former1_RobotHead1_FlapCheck1_0_Rejected_n\xa0',
 'Cover_Former1_Robo

In [130]:
reject_tags = [tag.replace(u'\xa0','') for tag in reject_tags]
reject_tags

['Cover_Extraction_Turret_CartonsNotExtracted_0_Actual_n',
 'Cover_TransportBelt_CheckExternal_0_0_Rejected_n',
 'Cover_TransportBelt_CheckInternal_0_0_Rejected_n',
 'Cover_TransportBelt_RobotTrack_Skipped_0_Rejected_n',
 'Cover_Forming_PatchErectionCheck_Bad_Counter_Rejected_n',
 'Cover_Forming_PatchErectionCheck_BadAngle_Counter_Rejected_n',
 'Cover_Forming_PatchErectionCheck_NoFeedback_Counter_Rejected_n',
 'Cover_TransportBelt_HoleFlap_0_Check_Rejected_n',
 'Cover_Former1_ExternalGlueCamera_Bad_0_Rejected_n',
 'Cover_Former1_ExternalGlueCamera_NoFeedback_0_Rejected_n',
 'Cover_Former1_InternalGlueCamera_Bad_0_Rejected_n',
 'Cover_Former1_InternalGlueCamera_NoFeedback_0_Rejected_n',
 'Cover_Former1_ Glue_Dry _0_Rejected_n',
 'Cover_Former1_External_CoverNotSeenBeforeGlue_0_Rejected_n',
 'Cover_Former1_Internal_CoverNotSeenBeforeGlue_0_Rejected_n',
 'Cover_Former1_RobotHead1_FlapCheck1_0_Rejected_n',
 'Cover_Former1_RobotHead1_FlapCheck2_0_Rejected_n',
 'Cover_Former1_RobotHead2_Flap

In [131]:
tags_to_extract=get_tags_list(lines=site_lines,sensors=reject_tags,sep='_', topic=site_servers['historian']['proficy']['topic'])
tags_to_extract=[t for t in tags_to_extract if 'CaseCheckweigher_Reject_Case_Weight_Real' not in t]
tags_to_extract

['URL-PACK.L13_Cover_Extraction_Turret_CartonsNotExtracted_0_Actual_n',
 'URL-PACK.L13_Cover_TransportBelt_CheckExternal_0_0_Rejected_n',
 'URL-PACK.L13_Cover_TransportBelt_CheckInternal_0_0_Rejected_n',
 'URL-PACK.L13_Cover_TransportBelt_RobotTrack_Skipped_0_Rejected_n',
 'URL-PACK.L13_Cover_Forming_PatchErectionCheck_Bad_Counter_Rejected_n',
 'URL-PACK.L13_Cover_Forming_PatchErectionCheck_BadAngle_Counter_Rejected_n',
 'URL-PACK.L13_Cover_Forming_PatchErectionCheck_NoFeedback_Counter_Rejected_n',
 'URL-PACK.L13_Cover_TransportBelt_HoleFlap_0_Check_Rejected_n',
 'URL-PACK.L13_Cover_Former1_ExternalGlueCamera_Bad_0_Rejected_n',
 'URL-PACK.L13_Cover_Former1_ExternalGlueCamera_NoFeedback_0_Rejected_n',
 'URL-PACK.L13_Cover_Former1_InternalGlueCamera_Bad_0_Rejected_n',
 'URL-PACK.L13_Cover_Former1_InternalGlueCamera_NoFeedback_0_Rejected_n',
 'URL-PACK.L13_Cover_Former1_ Glue_Dry _0_Rejected_n',
 'URL-PACK.L13_Cover_Former1_External_CoverNotSeenBeforeGlue_0_Rejected_n',
 'URL-PACK.L13_Cov

### extracting 3 weeks data

In [132]:
start_dttm = datetime.now() - timedelta(days=21)
#start_dttm = start_dttm - timedelta(days=site_days_to_retake)
start_time = start_dttm
end_time = datetime.now()
print(f'Extracting data between: {start_time} and {end_time}')

Extracting data between: 2022-05-18 14:12:56.630402 and 2022-06-08 14:12:56.630402


In [133]:
reject_historian_extract_df = historian.get_tag_values(
                    site_servers['historian']['proficy']['server_name'],
                    start_time=start_time,
                    end_time=end_time,
                    filter_name=tags_to_extract)
print(f'Shape of data extract: {reject_historian_extract_df.shape}')
                

Shape of data extract: (310174, 2)


In [134]:
# process historian extract 
box_rejects_df = transform_counters_extract(reject_historian_extract_df, site_name=site_name, local_tz=site_tz)
box_rejects_df

TRANSFORM: Shape of original paking rejects data: (310174, 3)
TRANSFORM: Step 1 - Removing cases where the counter values were not logged (NaNs) or negative
TRANSFORM: # removed records 0
TRANSFORM: Shape after step: (310174, 3)
TRANSFORM: Step 2 - Removing cases, where counter has dropped down in between two resets (historian outage issue)
TRANSFORM: # removed records 0
TRANSFORM: Shape after step: (310174, 4)
TRANSFORM: Step 3 - Removing cases where the counter was not changing
TRANSFORM: # removed records 0
TRANSFORM: Shape after step: (310174, 5)
TRANSFORM: Step 4 - Removing cases where the counter reports zero
TRANSFORM: # removed records 267575
TRANSFORM: Shape after step: (42599, 5)
TRANSFORM: Shape after addition of columns: (42599, 6)


Unnamed: 0,DateTime,Value,rejects_qty,line,tag,site
219846,2022-05-19 08:47:58.484,1,1.0,L13,Checkweigher_Base _2Dcheck_TimeOut_Rejected_n,Urlati
219845,2022-05-19 08:48:05.484,2,1.0,L13,Checkweigher_Base _2Dcheck_TimeOut_Rejected_n,Urlati
219843,2022-05-19 09:56:09.397,1,1.0,L13,Checkweigher_Base _2Dcheck_TimeOut_Rejected_n,Urlati
219842,2022-05-19 09:56:24.397,2,1.0,L13,Checkweigher_Base _2Dcheck_TimeOut_Rejected_n,Urlati
219841,2022-05-19 10:17:46.437,4,2.0,L13,Checkweigher_Base _2Dcheck_TimeOut_Rejected_n,Urlati
...,...,...,...,...,...,...
277570,2022-06-02 20:50:42.418,8,1.0,L13,OLCP_1DCheck_0_NOKRejected_n,Urlati
277568,2022-06-03 07:26:31.714,1,1.0,L13,OLCP_1DCheck_0_NOKRejected_n,Urlati
277567,2022-06-03 07:27:34.717,2,1.0,L13,OLCP_1DCheck_0_NOKRejected_n,Urlati
277566,2022-06-03 08:17:32.821,3,1.0,L13,OLCP_1DCheck_0_NOKRejected_n,Urlati


### getting line recipe tag info

In [135]:
line_recipe_tags_to_extract = [t for t in all_tag_list if '_'.join(t.split('_')[1:]) in site_historian_tags['tags_index']]
line_recipe_tags_to_extract

['URL-PACK.L13_Upack_LineRecipe']

In [136]:
historian_line_recipe_extract_df = historian.get_tag_values(
                    site_servers['historian']['proficy']['server_name'],
                    start_time=start_time - timedelta(days=1),
                    end_time=end_time,
                    filter_name=list(line_recipe_tags_to_extract))
print(f'Shape of data extract: {historian_line_recipe_extract_df.shape}')
                

Shape of data extract: (215, 2)


In [137]:
historian_line_recipe_extract_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Value,Quality
Tag,Timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
URL-PACK.L13_Upack_LineRecipe,2022-06-03 10:30:13.391000+00:00,52,3
URL-PACK.L13_Upack_LineRecipe,2022-06-02 06:29:29.883000+00:00,13,3
URL-PACK.L13_Upack_LineRecipe,2022-05-31 09:56:36.214000+00:00,52,3
URL-PACK.L13_Upack_LineRecipe,2022-05-30 10:14:33.234000+00:00,42,3
URL-PACK.L13_Upack_LineRecipe,2022-05-30 05:48:13+00:00,0,0
URL-PACK.L13_Upack_LineRecipe,...,...,...
URL-PACK.L13_Upack_LineRecipe,2022-05-23 05:20:42.445000+00:00,0,0
URL-PACK.L13_Upack_LineRecipe,2022-05-23 05:19:42.661000+00:00,0,0
URL-PACK.L13_Upack_LineRecipe,2022-05-23 05:19:34.441000+00:00,0,0
URL-PACK.L13_Upack_LineRecipe,2022-05-23 05:18:34.656000+00:00,0,0


In [138]:
line_recipe_status_df = put_to_wonderware_format(historian_line_recipe_extract_df, inplace=False)
line_recipe_status_df = line_recipe_status_df.assign(DateTime = line_recipe_status_df['DateTime'].dt.tz_convert(site_tz).dt.tz_localize(None))
line_recipe_status_df = add_site_line_tag(line_recipe_status_df, site=site_name)
line_recipe_status_df

TRANSFORM: Shape after addition of columns: (215, 5)


Unnamed: 0,DateTime,Value,line,tag,site
214,2022-05-23 07:18:26.436,0,L13,LineRecipe,Urlati
213,2022-05-23 07:18:34.656,0,L13,LineRecipe,Urlati
212,2022-05-23 07:19:34.441,0,L13,LineRecipe,Urlati
211,2022-05-23 07:19:42.661,0,L13,LineRecipe,Urlati
210,2022-05-23 07:20:42.445,0,L13,LineRecipe,Urlati
...,...,...,...,...,...
4,2022-05-30 07:48:13.000,0,L13,LineRecipe,Urlati
3,2022-05-30 12:14:33.234,42,L13,LineRecipe,Urlati
2,2022-05-31 11:56:36.214,52,L13,LineRecipe,Urlati
1,2022-06-02 08:29:29.883,13,L13,LineRecipe,Urlati


### map transformed reject data to line recipe

In [139]:
## add line recipe
box_rejects_df = add_dim_key_time(fact_df=box_rejects_df, 
                                            dim_df=line_recipe_status_df.rename(columns={'Value':'Line_recipe'}), 
                                            dim_id='Line_recipe', fact_dttm='DateTime', dim_dttm='DateTime', 
                                            group_by=['site', 'line'])
box_rejects_df

Unnamed: 0,DateTime,Value,rejects_qty,line,tag,site,Line_recipe
17396,2022-05-18 16:12:59.412,3197.0,3197.0,L13,Pack_Counter_n,Urlati,
17397,2022-05-18 16:13:03.413,3198.0,1.0,L13,Pack_Counter_n,Urlati,
17398,2022-05-18 16:13:05.413,3200.0,2.0,L13,Pack_Counter_n,Urlati,
17399,2022-05-18 16:13:10.414,3202.0,2.0,L13,Pack_Counter_n,Urlati,
17400,2022-05-18 16:13:12.414,3204.0,2.0,L13,Pack_Counter_n,Urlati,
...,...,...,...,...,...,...,...
41574,2022-06-06 07:00:16.739,5.0,2.0,L13,FC11_Dataman_Reads_Result_Bad_n,Urlati,52
41989,2022-06-06 09:37:19.093,2.0,2.0,L13,BoxCheckweigher_0_Extra_Rejected_n,Urlati,52
42011,2022-06-06 09:37:19.093,3.0,3.0,L13,BoxCheckweigher_0_Zone_0_Rejected_n,Urlati,52
42374,2022-06-06 09:37:19.093,81.0,81.0,L13,BoxCheckweigher_0_Zone_3_Rejected_n,Urlati,52


### extract agile flag and project flag

In [140]:
agile_tags = [t for t in reject_tags if t in site_historian_tags['tags_agile']]


In [141]:
#site_agile_tags = get_tags_list(lines=site_lines_agile, sensors=agile_tags, sep='_', 
                                                #topic=site_servers['historian']['proficy']['topic'])

In [142]:
#print(f'Extracting data between: {start_time} and {end_time}')
#agile_extract_df = historian.get_tag_values(site_servers['historian']['proficy']['server_name'], 
#                                                                        start_time=start_time - timedelta(days=1),
#                                                                        end_time=end_time,
#                                                                        filter_name=site_agile_tags)
#logging.info(f'Shape of agile flag extract: {agile_extract_df.shape}')


In [143]:
# assign agile flaf and project flag = 0
box_rejects_df = box_rejects_df.assign(agile_flag = 0, project_flag = 0)
box_rejects_df

Unnamed: 0,DateTime,Value,rejects_qty,line,tag,site,Line_recipe,agile_flag,project_flag
17396,2022-05-18 16:12:59.412,3197.0,3197.0,L13,Pack_Counter_n,Urlati,,0,0
17397,2022-05-18 16:13:03.413,3198.0,1.0,L13,Pack_Counter_n,Urlati,,0,0
17398,2022-05-18 16:13:05.413,3200.0,2.0,L13,Pack_Counter_n,Urlati,,0,0
17399,2022-05-18 16:13:10.414,3202.0,2.0,L13,Pack_Counter_n,Urlati,,0,0
17400,2022-05-18 16:13:12.414,3204.0,2.0,L13,Pack_Counter_n,Urlati,,0,0
...,...,...,...,...,...,...,...,...,...
41574,2022-06-06 07:00:16.739,5.0,2.0,L13,FC11_Dataman_Reads_Result_Bad_n,Urlati,52,0,0
41989,2022-06-06 09:37:19.093,2.0,2.0,L13,BoxCheckweigher_0_Extra_Rejected_n,Urlati,52,0,0
42011,2022-06-06 09:37:19.093,3.0,3.0,L13,BoxCheckweigher_0_Zone_0_Rejected_n,Urlati,52,0,0
42374,2022-06-06 09:37:19.093,81.0,81.0,L13,BoxCheckweigher_0_Zone_3_Rejected_n,Urlati,52,0,0


### aggregating data

In [144]:
## aggregating
box_rejects_df = box_rejects_df.assign(DateTimeMin = box_rejects_df['DateTime'].dt.floor(rejects_agg_freq))

groupby_cols = ['site', 'line', 'DateTimeMin', 'tag', 
                              'agile_flag', 'project_flag', 'Line_recipe']
agg_dict = {'rejects_qty':'sum', 'DateTime':'min'}

box_rejects_agg_df = box_rejects_df.groupby(groupby_cols).agg(agg_dict).reset_index()
box_rejects_agg_df

Unnamed: 0,site,line,DateTimeMin,tag,agile_flag,project_flag,Line_recipe,rejects_qty,DateTime
0,Urlati,L13,2022-05-23 07:30:00,Extraction_Turret_CartonsNotExtracted_0_Actual_n,0,0,0,18.0,2022-05-23 07:51:15.641
1,Urlati,L13,2022-05-23 07:30:00,Former1_ExternalGlueCamera_Bad_0_Rejected_n,0,0,0,2.0,2022-05-23 07:51:15.641
2,Urlati,L13,2022-05-23 07:30:00,Former1_InternalGlueCamera_Bad_0_Rejected_n,0,0,0,1.0,2022-05-23 07:51:15.641
3,Urlati,L13,2022-05-23 07:30:00,Former1_InternalGlueCamera_NoFeedback_0_Reject...,0,0,0,1.0,2022-05-23 07:51:15.641
4,Urlati,L13,2022-05-23 07:30:00,Former1_RobotHead4_FlapCheck2_0_Rejected_n,0,0,0,1.0,2022-05-23 07:51:15.641
...,...,...,...,...,...,...,...,...,...
1771,Urlati,L13,2022-06-06 07:00:00,FC11_Dataman_Reads_Result_Bad_n,0,0,52,3.0,2022-06-06 07:00:15.739
1772,Urlati,L13,2022-06-06 09:30:00,BoxCheckweigher_0_Extra_Rejected_n,0,0,52,2.0,2022-06-06 09:37:19.093
1773,Urlati,L13,2022-06-06 09:30:00,BoxCheckweigher_0_Zone_0_Rejected_n,0,0,52,3.0,2022-06-06 09:37:19.093
1774,Urlati,L13,2022-06-06 09:30:00,BoxCheckweigher_0_Zone_3_Rejected_n,0,0,52,81.0,2022-06-06 09:37:19.093


In [145]:
rename_dict = {'DateTimeMin':'datetime', 'tag':'reject cause', 
                               'Line_recipe':'line_recipe', 'DateTime':'start_time', 
                               'agile_flag':'agile_flag', 'project_flag':'project_flag'}

box_rejects_agg_df = box_rejects_agg_df.rename(columns = rename_dict)
box_rejects_agg_df

Unnamed: 0,site,line,datetime,reject cause,agile_flag,project_flag,line_recipe,rejects_qty,start_time
0,Urlati,L13,2022-05-23 07:30:00,Extraction_Turret_CartonsNotExtracted_0_Actual_n,0,0,0,18.0,2022-05-23 07:51:15.641
1,Urlati,L13,2022-05-23 07:30:00,Former1_ExternalGlueCamera_Bad_0_Rejected_n,0,0,0,2.0,2022-05-23 07:51:15.641
2,Urlati,L13,2022-05-23 07:30:00,Former1_InternalGlueCamera_Bad_0_Rejected_n,0,0,0,1.0,2022-05-23 07:51:15.641
3,Urlati,L13,2022-05-23 07:30:00,Former1_InternalGlueCamera_NoFeedback_0_Reject...,0,0,0,1.0,2022-05-23 07:51:15.641
4,Urlati,L13,2022-05-23 07:30:00,Former1_RobotHead4_FlapCheck2_0_Rejected_n,0,0,0,1.0,2022-05-23 07:51:15.641
...,...,...,...,...,...,...,...,...,...
1771,Urlati,L13,2022-06-06 07:00:00,FC11_Dataman_Reads_Result_Bad_n,0,0,52,3.0,2022-06-06 07:00:15.739
1772,Urlati,L13,2022-06-06 09:30:00,BoxCheckweigher_0_Extra_Rejected_n,0,0,52,2.0,2022-06-06 09:37:19.093
1773,Urlati,L13,2022-06-06 09:30:00,BoxCheckweigher_0_Zone_0_Rejected_n,0,0,52,3.0,2022-06-06 09:37:19.093
1774,Urlati,L13,2022-06-06 09:30:00,BoxCheckweigher_0_Zone_3_Rejected_n,0,0,52,81.0,2022-06-06 09:37:19.093


In [146]:
box_rejects_agg_df.to_csv('..\data\initial loading\SUD_BOX_REJECT.csv')