<a href="https://colab.research.google.com/github/janinerottmann/Drillhole-Inspection/blob/master/Data_Labeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Data Labeling

This notebook demonstrates the assignment of sensor data logs to the individual drills. We use thresholds to identify the points in time when accelleration rises above a constant level of 12N and falls again. These two points in time mark the start and end points of each drill. This way we automatically extract all drillholes from the recording log of the entire production process including set-up times of a gearbox component.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# change working directory
!pwd
%cd ./drive/My\ Drive

In [None]:
datafolder = 'data/'

##Prepare Quality File

In [None]:
qualityFile = datafolder+'Bearbeitungszeiten Zahnträger fertig.xlsx'
quality = pd.read_excel(qualityFile,header=None)

In [None]:
NEN = quality[2][0]#/1e3
OTG = float(quality[2][1][1:])*1e5
UTG = float(quality[2][2][1:])*1e5
Nmax = NEN + OTG
Nmin = NEN + UTG
quality = pd.read_excel(qualityFile,skiprows=3)
quality.dropna(subset=['Zeit','Bohrloch-Nummer'],inplace=True) 

print('NEN: {}'.format(NEN))
print('OTG: {}'.format(OTG))
print('UTG: {}'.format(UTG))
print('-'*5)
print('Nmax: {}'.format(Nmax))
print('Nmin: {}'.format(Nmin))

 

quality.rename(columns={'0°-180°':'p1',
                       '90°-270°':'p2',
                       '0°-180°.1':'p3',
                       '90°-270°.1':'p4',
                       '0°-180°.2':'p5',
                      '90°-270°.2':'p6',
                       'Bohrloch-Nummer':'BohrlochNr',
                       'Teil Nr.':'TeilNr'},inplace=True)

 

## Fill part number

v = 0

for k in range(len(quality)):
    vrow = quality.TeilNr[k]
    if np.isnan(vrow):
        quality.TeilNr[k] = v
    else:
        v = vrow
## Add clasifier column

l_col = ['p'+str(k) for k in range(1,7)]

 

S = pd.Series([False for k in range(len(quality))])
for c in l_col:
# c = l_col[0]
    bup = quality[c] >= Nmax
    blow = quality[c] <= Nmin
    S = S | bup
    S = S | blow

S = S | ~quality.Auffälligkeiten.isna()

quality['classifier'] = S

##Load Sensor Data

In [None]:
bauteile = [3,4,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]

In [None]:
data=pd.DataFrame()
files = []

for bauteil in bauteile:
  file = 'part' + str(bauteil)+'.feather'
  files = np.append(files, file)

##Labeling Function

In [None]:
def label(file, bauteil):
  
  #read df
  df = pd.read_feather(datafolder + file)

  #access global file
  global qualityIndex

  #transform df
  df['changed'] = (df['rmsAccX'] > 12).astype(int).diff().fillna(1).astype(int) #set threshold
  df = df[df.changed != 0]
  df = df[['Timestamp', 'Time', 'rmsAccX', 'changed']]
  df = df.rename_axis('start').reset_index()
  df['end'] = df['start'].shift(-1)
  df['DataPoints'] = df['end'] - df['start']
  df = df[df.DataPoints.between(38000,50000)]
  df = df.reset_index()
  df = df[['start', 'end']]
  df1 = df.loc[0:47]
  df2 = df.loc[48:95]
  df1['BohrlochNr'] = np.arange(1, len(df1)+1)
  df2['BohrlochNr'] = np.arange(1, len(df2)+1)
  df = df1.merge(df2, left_on = 'BohrlochNr', right_on = 'BohrlochNr')

  #merge df with quality
  df['TeilNr'] = bauteil
  qualityIndex = quality.merge(df, left_on = ['BohrlochNr','TeilNr'], right_on = ['BohrlochNr','TeilNr'])

In [None]:
for file, bauteil in zip(files, bauteile):
     label(file, bauteil)
     data = data.append(qualityIndex, ignore_index = True)

##Export data as csv

In [None]:
#export file as csv
data.to_csv(r'./data/quality.csv', index = False, header=True)