In [None]:
import numpy as np 
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
import plotly.express as px

In [None]:
#Reading_Data
Data_Dir = "/workspaces/projects/Maintainance_Prediction"

telemetry_df = pd.read_csv("PdM_telemetry.csv")
errors_df = pd.read_csv("PdM_errors.csv")
maint_df = pd.read_csv("PdM_maint.csv")
failure_df = pd.read_csv("PdM_failures.csv")
machines_df = pd.read_csv("PdM_machines.csv")

#Date and Time Formatting, Sorting based on Date for better readability
tables = [telemetry_df,errors_df,maint_df,failure_df]
for df in tables:
  df["datetime"] = pd.to_datetime(df["datetime"], format = "%Y-%m-%d %H:%M:%S")
  df.sort_values(["datetime","machineID"],inplace=True, ignore_index = True)

In [None]:
failure_df.head()

**Telemetry Data**

For more simplicity we will select a machine data that consist of hourly average of voltage, rotation, pressure, and vibration collected from 100 Machines.



In [None]:
#First few rows of Telemetry data for Machine 1
telemetry_df[telemetry_df.machineID==1].head()

In [None]:
#Machine Count
telemetry_df.machineID.nunique()

In [None]:
telemetry_df['datetime'] = pd.to_datetime(
    telemetry_df['datetime'], format="%Y-%m-%d %H:%M:%S")

print("Total number of telemetry records: %d" % len(telemetry_df.index))

telemetry_df.describe()

In [None]:
telemetry_duplicates = telemetry_df[telemetry_df.duplicated(['datetime', 'machineID'])]
telemetry_duplicates

In [None]:
#Check Null Values
telemetry_df.isnull().sum()

In [None]:
#Let's assume machineID as 10
print(f"Shape of the Telemetry Record: {telemetry_df.shape}")
df_sin = telemetry_df.loc[telemetry_df['machineID']==10].reset_index(drop=True)
df_sin.head(n=5)

In [None]:
#Failure record of Machine 10
sel_fail = failure_df.loc[failure_df['machineID']==10]
pd.DataFrame(sel_fail)

In [None]:
#Error Record of Machine 10
sel_err = errors_df.loc[errors_df['machineID']==10]
pd.DataFrame(sel_err).head()

In [None]:
#Voltage of machine 1 for 1 month only
plot_df = telemetry_df.loc[(telemetry_df['machineID'] == 1) &
                        (telemetry_df['datetime'] > pd.to_datetime('2015-01-01')) &
                        (telemetry_df['datetime'] < pd.to_datetime('2015-02-01')), ['datetime', 'volt']]
fig = px.line(x=plot_df['datetime'].values, y=plot_df['volt'].values, title='Voltage over time', template='plotly_dark')
fig.update_layout(xaxis_title='Time', yaxis_title='Voltage')
fig.show()

In [None]:
errors_df['datetime'] = pd.to_datetime(
    errors_df['datetime'], format="%Y-%m-%d %H:%M:%S")
errors_df['errorID'] = errors_df['errorID'].astype('object')

print("Total number of error records: %d" % len(errors_df.index))
errors_df.head()

In [None]:
fig = px.bar(x=errors_df['errorID'].values,
             title='Count of Errors', template='plotly_dark', color=errors_df['errorID'].values)
fig.update_layout(xaxis_title='Error Type', yaxis_title='Count')
fig.show()

In [None]:
maint_df['datetime'] = pd.to_datetime(
    maint_df['datetime'], format="%Y-%m-%d %H:%M:%S")
maint_df['comp'] = maint_df['comp'].astype('object')

print("Total number of maintenance records: %d" % len(main_df.index))
maint_df.head()

In [None]:
features = ['volt', 'rotate', 'pressure', 'vibration']

# Loop through each feature and plot histogram
for feature in features:
    fig = px.histogram(telemetry_df, x=feature, title=f'Histogram for {feature}', template='plotly_dark')
    fig.update_layout(xaxis_title=feature, yaxis_title='Frequency')
    fig.show()