In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import datetime
import os
from IPython.display import display, Markdown

%matplotlib inline

In [None]:
def timestamper(timestamp):
    return datetime.datetime.fromtimestamp(float(timestamp)/1000)

# Label data Manually
Read each raw and fft file, plot the ones that we doesn't know its label and wait for the user to input the label
- *0:* Motor is offline
- *1:* Normal behavior of the motor
- *2:* Anomaly in the behavior (the motor is vibrating too much)

In [None]:
files = glob.glob("data/raw/*")

In [None]:
labels = []
datetimes = []

#Read the processed.csv file if the process was started and not finished
try:
    processed = pd.read_csv("processed.csv",index_col=0)
except:
    processed = pd.DataFrame(columns=["file"])
    
    
count = 0
for file in files:
    if file not in processed.file:
        #Reading raw and fft data.
        df = pd.read_csv(file,index_col="tiempo", parse_dates=True, date_parser=timestamper)
        df_fft = pd.read_csv(file.replace(".csv",".fft.csv").replace("raw","fft"),index_col=0)
        filename = os.path.basename(file).split(".")
        date = filename[2]
        time = filename[3]
        date_time = pd.to_datetime(date + " "+ time)
        
        # If the std of the fft data is greater than 1, the motor is functioning
        # we need to see if the behavior is normal or not
        if df_fft.iloc[1:].std().abs().mean() > 1:
            print("filename: {}  {}/{}".format(file,count,len(files)))
            
            #Plotting time and fourier plot
            fig,ax = plt.subplots(1,2, figsize=(20,5))
            df.plot(ax=ax[0], title="Time")
            df_fft.plot(ax=ax[1], title="Fourier")
            ax[0].set_ylim((-3,3))
            ax[1].set_ylim((-300,300))

            plt.legend()
            plt.show()
            
            #Showing the Time and Fourier stats
            stats = {}
            stats["Time-mean"] = df.mean()
            stats["Time-std"] = df.std()
            stats["Fourier-mean"] = df_fft.iloc[1:].mean()
            stats["Fourier-std"] = df_fft.iloc[1:].std()
            stats = pd.DataFrame(stats).transpose()
            stats["Mean"] = stats.mean(axis=1)
            display(stats)
            
            
            #Asking for label
            label = input("0: Off, 1:Normal, 2:Anomaly ")
            while label not in ["0","1","2"]:
                display(Markdown("**Incorrect Input**"))
                label = input("0: Off, 1:Normal, 2:Anomaly ")
            label = int(label)
        else:
            #The motor if off (fft-std < 1)
            label = 0
    
        count +=1
        labels.append(label)
        datetimes.append(date_time)
        
        #Adding the file to proccesed
        processed = processed.append({"file":file},ignore_index=True)

In [None]:
#If the process of labeling is not finished
#stop the previous cell and save the labels
processed.to_csv("processed.csv")

labels_df = pd.DataFrame({
                "time":datetimes,
                "label":labels
            })
try:
    labels_old = pd.read_csv("data/labels.csv")
    labels_df = labels.concat([labels_old,labels_df])

labels_df.to_csv("data/labels.csv")