# Alarm Data Analysis



In [1]:
import pandas as pd
import numpy as np
import datetime
import os
import xlrd
import re

# this needs a settings.cfg file in the same directory

import configparser

Config = configparser.ConfigParser()

Config.read('settings.cfg')

def ConfigSectionMap(section):
    dict1 = {}
    options = Config.options(section)
    for option in options:
        try:
            dict1[option] = Config.get(section, option)
            if dict1[option] == -1:
                DebugPrint("skip: %s" % option)
        except:
            print("exception on %s!" % option)
            dict1[option] = None
    return dict1


dir_ip21 = ConfigSectionMap("SectionOne")['ip21']
dir_sanofi_share = ConfigSectionMap("SectionOne")['sanofi']
dir_alarms = ConfigSectionMap("SectionOne")['alarms']

print ("dir_sanofi_share is %s" % (dir_sanofi_share))
print ("dir_alarms is %s" % (dir_alarms))

pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', None)

dir_sanofi_share is C:/Users/mark_/Sanofi/Sanofi x McLaren sharing - General/Frankfurt sprint/SFD/
dir_alarms is C:/Users/mark_/Sanofi/Sanofi x McLaren sharing - General/Frankfurt sprint/SFD/Alarms_data/


## AL6_Endverpacker

Dir: Y:\E00_Solostar\E6_Assembly_Line_6\E68_Endverpacker\PAL

These files are converted from .VAA files into csv and then we can read them:

```

ID	State	StateID	StateText	TimeIn	TimeUserAck	TimePLCAck	MessageID	MessageText	Occur	GroupID	GroupText	Priority	Parameter1	Parameter2	Group	Class	HistoricalID	HasNotes	SortBuffer	TimeInUTC	User	Machine
579	0	$2524	MGG	31/07/2021 23:57	30/12/1899 00:00:00	01/08/2021 00:30	$9051	Maschine wartet auf Produkte	1	$8879	Warnung Maschine	0	$7326	12	42	5	1	0	Alarmhistory_1-8-21--12-0	31/07/2021 21:57		FRAM28556
579	4	$2522	MGK	01/08/2021 00:30	30/12/1899 00:00:00	30/12/1899 00:00:00	$9051	Maschine wartet auf Produkte	1	$8879	Warnung Maschine	0	$7326	12	42	5	2	0	Alarmhistory_1-8-21--12-0	31/07/2021 22:30		FRAM28556
579	0	$2524	MGG	01/08/2021 00:30	30/12/1899 00:00:00	01/08/2021 00:31	$9051	Maschine wartet auf Produkte	1	$8879	Warnung Maschine	0	$7326	12	42	5	3	0	Alarmhistory_1-8-21--12-0	31/07/2021 22:30		FRAM28556
828	4	$2522	MGK	01/08/2021 00:32	30/12/1899 00:00:00	30/12/1899 00:00:00	$9300	Roboter: Palette voll	1	$8896	Warnung Roboter: Kartonhandling	0	$8323	10004	59	5	4	0	Alarmhistory_1-8-21--12-0	31/07/2021 22:32		FRAM28556

```

In [2]:
folder = 'AL6_Endverpacker'

Files = []
df = []
Endverpacker_df = pd.DataFrame()

for filename in os.listdir( dir_alarms + folder ):
    if filename.endswith('.csv'):
        Files.append(filename)

Files.sort(key=str.lower)

# Loop through all the files.
for filename in Files:
    path = os.path.join(dir_alarms, folder, filename)
    # print (path)
    df = pd.read_csv(path, encoding='mbcs')
    df['Filename'] = filename
    Endverpacker_df = Endverpacker_df.append(df)


# C:\Users\mark_\Sanofi\Sanofi x McLaren sharing - General\Frankfurt sprint\SFD\Alarms_data\AL6_Endverpacker
Endverpacker_df['Machine'] = folder

## AL6_Etikettierer

Dir: Y:\E00_Solostar\E6_Assembly_Line_6\E64_Etikettierer\AuditTrail

There are PDF files and AuditTrail csv files which hold the same information.
- status 0 = start of alarm
- status 3 = end of alarm

Problem is the ID of the alarm is reused repeatedly and I can see multiple Status 3 returned for same ID without a Status 0 apparently initiating the alarm

csv files looks like this:

```
Nummer;Projekt;Status;Prio;Quali;Datum;Zeit;UTC Versatz;Meldetext;ID;BMK;Parameterwert;Einheit;Alt/Neu-Wert;Benutzer;Auftrag;Charge
00000001;BAS;0;4;L;15.08.2021;22:00:00;+02:00;01-01 Solostar PEN;;TYPE;0001;;N;Bediener;80758594;1F7908A

```

Approach:
- read all the xls files in from folder   
- derive Start


In [3]:
folder = 'AL6_Etikettierer'

Files = []
df = []
Etikettierer_df = pd.DataFrame()

for filename in os.listdir( dir_alarms + folder ):
    if filename.endswith('.csv'):
        Files.append(filename)

Files.sort(key=str.lower)

# Loop through all the files.
for filename in Files:
    path = os.path.join(dir_alarms, folder, filename)
    # print (path)
    df = pd.read_csv(path, sep=';', encoding='utf-16')
    df['Filename'] = filename    
    Etikettierer_df = Etikettierer_df.append(df)

Etikettierer_df['Machine'] = folder

# convert dates to datetime format
Etikettierer_df['Start'] = pd.to_datetime(Etikettierer_df['Datum'] + "."+ Etikettierer_df['Zeit'], format='%d.%m.%Y.%H:%M:%S')

# drop the rows with NaN values in ID - thought it was causing problems when trying to merge on ID, can't calc duration for them anyway, and I don't think they are relevant alarms
Etikettierer_df = Etikettierer_df[Etikettierer_df['ID'].notnull()]

# This was attempting to find a corresponding Status 3 for an ID to calculate 

# take the rows with status = 3
# Etikettierer_end = Etikettierer_df[['ID','Start']][Etikettierer_df['Status'] == 3]
# Etikettierer_end.rename(columns={'Start': 'End'}, inplace=True)

# get rid of the rows with status 3 from main df
# Etikettierer_df = Etikettierer_df[Etikettierer_df['Status'] != 3]

# merge 
# Etikettierer_df.merge(Etikettierer_end, on='ID', how='inner')

In [22]:
temp_df = Etikettierer_df[Etikettierer_df['ID'] == 364]
temp_df.set_index('Start', inplace=True)
temp_df.sort_index()
temp_df.loc['2021-06-04']

Unnamed: 0_level_0,Nummer,Projekt,Status,Prio,Quali,Datum,Zeit,UTC Versatz,Message Text,ID,BMK,Parameterwert,Einheit,Alt/Neu-Wert,Benutzer,Auftrag,Charge,Filename,Machine
Start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-06-04 00:11:29,140,MMA,0,3,L,04.06.2021,00:11:29,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 000000 2021-06-04 K747B14.csv,AL6_Etikettierer
2021-06-04 00:14:32,142,MMA,3,3,L,04.06.2021,00:14:32,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 000000 2021-06-04 K747B14.csv,AL6_Etikettierer
2021-06-04 00:16:50,145,MMA,0,3,L,04.06.2021,00:16:50,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 000000 2021-06-04 K747B14.csv,AL6_Etikettierer
2021-06-04 00:17:05,146,MMA,3,3,L,04.06.2021,00:17:05,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 000000 2021-06-04 K747B14.csv,AL6_Etikettierer
2021-06-04 00:18:17,151,MMA,0,3,L,04.06.2021,00:18:17,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 000000 2021-06-04 K747B14.csv,AL6_Etikettierer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-04 21:33:45,139,MMA,0,3,L,04.06.2021,21:33:45,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 210000 2021-06-04 K747B14.csv,AL6_Etikettierer
2021-06-04 21:33:48,142,MMA,3,3,L,04.06.2021,21:33:48,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 210000 2021-06-04 K747B14.csv,AL6_Etikettierer
2021-06-04 21:33:51,144,MMA,0,3,L,04.06.2021,21:33:51,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 210000 2021-06-04 K747B14.csv,AL6_Etikettierer
2021-06-04 21:50:23,160,MMA,3,3,L,04.06.2021,21:50:23,+02:00,Devicesperre wurde geschlossen durch: Transportbänder,364.0,,,,,Bediener,80752924,1F051A,AL6Audit Trail 210000 2021-06-04 K747B14.csv,AL6_Etikettierer


## AL6_Montage

Dir: Y:\E00_Solostar\E6_Assembly_Line_6\E63_Montage\CSV

- Saved as Text files but standard 'csv' files separated by semi-colon   
- Selecting just the files that begin with 'A' as they appear to be the warnings and alarms.    
- They don't have a header record   
- Don't know what the 'C*' files are   

``` 

Warnung;2021-08-05 02:00:09;2021-08-05 02:00:10;1342;1091-1B14 Standby: Teil nicht auf Abholposition Spur 14;0..Flt[1342];
Warnung;2021-08-05 02:00:09;2021-08-05 02:00:10;1343;1091-1B15 Standby: Teil nicht auf Abholposition Spur 15;0..Flt[1343];
Warnung;2021-08-05 02:00:23;2021-08-05 02:03:48;1198;1462-7B1 Standby: Max. Stau Abführband erreicht;0..Flt[1198];
Warnung;2021-08-05 02:04:20;2021-08-05 02:04:33;1198;1462-7B1 Standby: Max. Stau Abführband erreicht;0..Flt[1198];

```

In [4]:
folder = 'AL6_Montage'

Files = []
df = []
Montage_df = pd.DataFrame()

for filename in os.listdir( dir_alarms + folder ):
    if filename.startswith('A'):
        path = os.path.join(dir_alarms, folder, filename)
        if os.path.getsize(path) > 0:
            Files.append(filename)

Files.sort(key=str.lower)

# Loop through all the files.
for filename in Files:
    path = os.path.join(dir_alarms, folder, filename)
    # print (path)
    df = pd.read_csv(path, sep=';', encoding='cp1252', header=None)
    df['Filename'] = filename
    Montage_df = Montage_df.append(df)

Montage_df.columns=['Type','Start','End','Duration','Message Text','Some Code','Not sure','Filename']
Montage_df['Machine'] = folder

## AL6_Kartonierer

Dir: Y:\E00_Solostar\E6_Assembly_Line_6\E65_Kartonierer\40 - Reports

- Saved as PDF and xlsx files in a report format - warnings and alarms written to sections, if they occurred in that reporting period   
- Selecting just the xlsx files to work with.    
- interate over each file, find 'Alarme' and append row to dataframe until we find next blank row


``` 

					
Alarme					
Meldungstext	Beginn	Ende			
Waage: Ausdrucke können nicht mehr lokal gespeichert werden. Bitte legen Sie ein Speichermedium ein	"10:31:09
28.08.2021"	"11:24:19
28.08.2021"			
 =A+52-AP-U1# AS-i Fehler	"11:22:47
28.08.2021"	"11:23:22
28.08.2021"			
Druckluft überprüfen (Druck zu gering) (=A+05-SP48)	"11:23:28
28.08.2021"	"11:23:40
28.08.2021"			


In [5]:
folder = 'AL6_Kartonierer'

Files = []
df = []
Kartonierer_df = pd.DataFrame()

# Loop through all the files.
for filename in os.listdir( dir_alarms + folder ):

    path = os.path.join(dir_alarms, folder, filename)
    df = pd.read_excel(path, na_filter=False)
    # df.set_index('Unnamed: 0', inplace=True)


    df['Filename'] = filename

    # Kartonierer_alarms = pd.DataFrame()

    for i, row in df.iterrows():
        str_temp = str(row[0])
        if (str_temp.find('Alarme') != -1): 
            alarm = True
        if row[0] == '': 
            alarm = False
        if alarm:
            Kartonierer_df = Kartonierer_df.append({'Message Text': row[0],
                                                    'Start': row[1],
                                                    'End': row[2],
                                                    'Filename': row['Filename']}, ignore_index=True)


Kartonierer_df = Kartonierer_df[Kartonierer_df['Message Text'].str.contains('Alarme|Meldu') == False]
# Kartonierer_alarms['Start'].replace('\n',' ', inplace=True)
Kartonierer_df['Start'] = Kartonierer_df['Start'].str[9:21] + ' ' + Kartonierer_df['Start'].str[0:8]
Kartonierer_df['End'] = Kartonierer_df['End'].str[9:21] + ' ' + Kartonierer_df['End'].str[0:8]

Kartonierer_df['Machine'] = folder
Kartonierer_df.head()


Unnamed: 0,Message Text,Start,End,Filename,Machine
2,Seidenader nicht bereit,30.03.2021 13:44:15,30.03.2021 13:44:44,2691_20210401_114604_AutomaticBatchFinalReport.xls,AL6_Kartonierer
3,"F4_403# Aufnahmefehler (=E+54-29SV1, =E+54-29SV3)",30.03.2021 13:47:15,30.03.2021 13:47:26,2691_20210401_114604_AutomaticBatchFinalReport.xls,AL6_Kartonierer
4,Druckluft überprüfen (Druck zu gering) (=A+05-SP48),30.03.2021 14:03:48,30.03.2021 14:03:54,2691_20210401_114604_AutomaticBatchFinalReport.xls,AL6_Kartonierer
5,Druckluft überprüfen (Druck zu gering) (=A+05-SP48),30.03.2021 14:05:33,30.03.2021 14:05:44,2691_20210401_114604_AutomaticBatchFinalReport.xls,AL6_Kartonierer
6,Druckluft überprüfen (Druck zu gering) (=A+05-SP48),30.03.2021 14:08:10,30.03.2021 14:08:16,2691_20210401_114604_AutomaticBatchFinalReport.xls,AL6_Kartonierer


In [6]:
Kartonierer_df.shape

(54928, 5)

In [12]:
Endverpacker_df.head()

Unnamed: 0,ID,State,StateID,StateText,Start,TimeUserAck,TimePLCAck,MessageID,Message Text,Occur,GroupID,GroupText,Priority,Parameter1,Parameter2,Group,Class,HistoricalID,HasNotes,SortBuffer,TimeInUTC,User,Machine,Filename,Duration
0,579,0,$2524,MGG,2021-07-31 23:57:45,30/12/1899 00:00:00,01/08/2021 00:30:15,$9051,Maschine wartet auf Produkte,1,$8879,Warnung Maschine,0,$7326,12,42,5,1,0,Alarmhistory_1-8-21--12-0,31/07/2021 21:57:45,,AL6_Endverpacker,Alarmhistory_1-8-21--12-0#VAA.csv,1950.0
1,579,4,$2522,MGK,2021-08-01 00:30:50,30/12/1899 00:00:00,30/12/1899 00:00:00,$9051,Maschine wartet auf Produkte,1,$8879,Warnung Maschine,0,$7326,12,42,5,2,0,Alarmhistory_1-8-21--12-0,31/07/2021 22:30:50,,AL6_Endverpacker,Alarmhistory_1-8-21--12-0#VAA.csv,
2,579,0,$2524,MGG,2021-08-01 00:30:50,30/12/1899 00:00:00,01/08/2021 00:31:31,$9051,Maschine wartet auf Produkte,1,$8879,Warnung Maschine,0,$7326,12,42,5,3,0,Alarmhistory_1-8-21--12-0,31/07/2021 22:30:50,,AL6_Endverpacker,Alarmhistory_1-8-21--12-0#VAA.csv,41.0
3,828,4,$2522,MGK,2021-08-01 00:32:05,30/12/1899 00:00:00,30/12/1899 00:00:00,$9300,Roboter: Palette voll,1,$8896,Warnung Roboter: Kartonhandling,0,$8323,10004,59,5,4,0,Alarmhistory_1-8-21--12-0,31/07/2021 22:32:05,,AL6_Endverpacker,Alarmhistory_1-8-21--12-0#VAA.csv,
4,828,0,$2524,MGG,2021-08-01 00:32:05,30/12/1899 00:00:00,01/08/2021 00:32:15,$9300,Roboter: Palette voll,1,$8896,Warnung Roboter: Kartonhandling,0,$8323,10004,59,5,5,0,Alarmhistory_1-8-21--12-0,31/07/2021 22:32:05,,AL6_Endverpacker,Alarmhistory_1-8-21--12-0#VAA.csv,10.0


In [8]:
from datetime import datetime

#Calculate duration in seconds and add as a column to Endverpacker. Rows without a duration are recorded as 'N/A'
a=pd.to_datetime(Endverpacker_df['TimePLCAck'], dayfirst=True)
b=pd.to_datetime(Endverpacker_df['TimeIn'], dayfirst=True)
c=a-b
d=c.dt.total_seconds()
e=d.where(d >0, 'N/A')
Endverpacker_df['Duration']=e

#Etikettierer does not include durations. A column has still been created for merging with 'N/A' values.
Etikettierer_df['Duration']='N/A'

#Calculate duration in seconds and add as a column to Montage. Rows without a duration are recorded as 'N/A'
a=pd.to_datetime(Montage_df['End'])
b=pd.to_datetime(Montage_df['Start'])
c=a-b
d=c.dt.total_seconds()
e=d.where(d >0, 'N/A')
Montage_df['Duration']=e

#Convert start time columns to datetime format

Endverpacker_df['TimeIn']=pd.to_datetime(Endverpacker_df['TimeIn'], dayfirst = True)

f=Etikettierer_df['Datum'] + "."+ Etikettierer_df['Zeit']
g=pd.to_datetime(f,format='%d.%m.%Y.%H:%M:%S')
Etikettierer_df['Start']=g

Montage_df['Start']=pd.to_datetime(Montage_df['Start'])

#Give a universal column name for Start Time (Start)

Endverpacker_df=Endverpacker_df.rename(columns={"TimeIn" : "Start"})
#Etikettierer_df=Etikettierer_df.rename(columns={"Zeit" : "Start"})

Kartonierer_df['Start'] = pd.to_datetime(Kartonierer_df['Start'], dayfirst=True)
Kartonierer_df['End'] = pd.to_datetime(Kartonierer_df['End'], dayfirst=True)
c = Kartonierer_df['End'] - Kartonierer_df['Start']
d = c.dt.total_seconds()
Kartonierer_df['Duration'] = d


KeyError: 'TimeIn'

In [13]:
#Create a universal column name for Message Text

Endverpacker_df=Endverpacker_df.rename(columns={"MessageText" : "Message Text"})
Etikettierer_df=Etikettierer_df.rename(columns={"Meldetext" : "Message Text"})


In [14]:
#Create and populate master alarms database with useful data read across

Alarms_df=pd.DataFrame(columns=['Message Text','Start','Duration','Filename','Machine'])
Alarms_df=pd.concat([Alarms_df, Etikettierer_df, Endverpacker_df, Montage_df, Kartonierer_df], join="inner")
Alarms_df=Alarms_df.reset_index()


In [15]:
Messages=Alarms_df['Message Text'].unique()

import googletrans
from googletrans import Translator

#Initialise
translator = Translator(service_urls=['translate.googleapis.com'])



In [19]:
#This theoertically works, but in practice googletranslate is unreliable and it does not translate consistently
#translations = {}
#for element in Messages:
#    # add translation to the dictionary
#    translations[element] = translator.translate(element).text
    
#Alarms_df['Message Text']=Alarms_df['Message Text'].replace(translations)
#Alarms_df.to_csv('alarms.csv')

In [16]:
#Instead a list of unique alarm messages has been extracted, translated and saved as a CSV file.
filename = 'translations.csv'
path = os.path.join(dir_ip21, filename)
df = pd.read_csv(path ,encoding='UTF-8', header=None, index_col=0)
# df = pd.read_csv(r'C:\Users\Raza-PC\Documents\McLaren\Sanofi\Sanofi\translations.csv',encoding='UTF-8',header=None,index_col=0)
translation=df.to_dict()
# Alarms_df['Message Text'] = Alarms_df['Message Text'].replace(translation[1])
Alarms_df['Message Text (English)'] = Alarms_df['Message Text'].replace(translation[1])


In [91]:
# this gets rid of (drops) the 'index' col and moves the newly created 'Message Text (English)' to the firstt col
cols = Alarms_df.columns.tolist()
cols = cols[-1:] + cols[1:-1]
Alarms_df = Alarms_df[cols]
filename = 'alarms.csv'
path = os.path.join(dir_ip21, filename)
Alarms_df.to_csv(path)

In [19]:
Alarms_df.groupby('Machine').count()

Unnamed: 0_level_0,index,Message Text,Start,Filename,Message Text (English)
Machine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AL6_Endverpacker,3975,3975,3975,3975,3975
AL6_Etikettierer,263336,263336,263336,263336,263336
AL6_Kartonierer,54928,54928,54928,54928,54928
AL6_Montage,447243,447243,447243,447243,447243
