In [7]:
import numpy as np
import scipy.io as sio # for reading matlab files
import pathlib
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import zipfile

%matplotlib inline

In [8]:
os.chdir('../')
root_dir = Path.cwd() # set the root directory as a Pathlib path

In [9]:
os.getcwd()

'c:\\Users\\Asus\\variational-auto-encoders-in-sagemaker'

# LOGGER CONF

In [10]:
import json
import logging
import logging.config

default_path='./code/logging.json'
default_level=logging.INFO
env_key='LOG_CFG'

class Logger:

    def getLogger(name):
        path = default_path
        value = os.getenv(env_key, None)
        if value:
            path = value
        if os.path.exists(path):
            with open(path, 'rt') as f:
                config = json.load(f)
            logging.config.dictConfig(config)
        else:
            logging.basicConfig(level=default_level)

        return logging.getLogger(name)

logging = Logger.getLogger(__name__)

In [11]:
input_data_folder = 'input/data/training'
output_data_folder = 'input/data/processed'
file_name = 'mill.mat'

In [12]:
# load the data from the matlab file
logging.info('Loading Data')
m_data = sio.loadmat(f'{input_data_folder}/{file_name}',struct_as_record=True)
logging.info(f'Data keys are: {m_data.keys()}')
logging.info('Data shape: %s', m_data['mill'].shape)

2021-07-20 19:23:04,301 - __main__ - INFO - Loading Data
2021-07-20 19:23:04,384 - __main__ - INFO - Data keys are: dict_keys(['__header__', '__version__', '__globals__', 'mill'])
2021-07-20 19:23:04,385 - __main__ - INFO - Data shape: (1, 167)


In [13]:
input_data = m_data['mill']
header = input_data.dtype.names
logging.info('Column names are: %s', header)


2021-07-20 19:23:05,083 - __main__ - INFO - Column names are: ('case', 'run', 'VB', 'time', 'DOC', 'feed', 'material', 'smcAC', 'smcDC', 'vib_table', 'vib_spindle', 'AE_table', 'AE_spindle')


In [14]:
header = input_data.dtype.names
df_metadata = pd.DataFrame()

In [15]:
for i in range(7):
    # list for storing the label data for each field
    x = []
    # iterate through each of the unique cuts
    for j in range(167):
        x.append(input_data[0,j][i][0][0])
    x = np.array(x)
    df_metadata[str(i)] = x

In [16]:
df_metadata.columns = header[0:7]
    
# create a column with the unique cut number
df_metadata['cut_no'] = [i for i in range(167)]
df_metadata.head()

Unnamed: 0,case,run,VB,time,DOC,feed,material,cut_no
0,1,1,0.0,2,1.5,0.5,1,0
1,1,2,,4,1.5,0.5,1,1
2,1,3,,6,1.5,0.5,1,2
3,1,4,0.11,7,1.5,0.5,1,3
4,1,5,,11,1.5,0.5,1,4


In [17]:
df_data = pd.DataFrame()
for cut_no in range(167):
    temp_dict = {}
    for variable_name in header[7:13]:
        temp_dict[variable_name] = input_data[0, cut_no][variable_name].flatten()
    temp_df = pd.DataFrame(temp_dict)
    temp_df['cut_no']=cut_no
    df_data = df_data.append(temp_df)


In [18]:
df_data

Unnamed: 0,smcAC,smcDC,vib_table,vib_spindle,AE_table,AE_spindle,cut_no
0,-0.017090,0.625000,0.078125,0.314941,0.087280,0.103760,0
1,0.263672,0.810547,0.085449,0.301514,0.098267,0.123291,0
2,0.207520,0.781250,0.078125,0.303955,0.092163,0.104980,0
3,0.302734,0.849609,0.073242,0.300293,0.095215,0.111084,0
4,0.239258,1.098633,0.083008,0.299072,0.083008,0.092163,0
...,...,...,...,...,...,...,...
8995,0.253906,1.674805,0.278320,0.270996,0.084839,0.101318,166
8996,0.478516,1.669922,0.253906,0.273438,0.075073,0.086060,166
8997,0.297852,1.665039,0.246582,0.278320,0.075684,0.084229,166
8998,0.356445,1.674805,0.239258,0.272217,0.075684,0.078125,166


In [19]:
pivotted_data = pd.melt(df_data, id_vars = 'cut_no', var_name='variable', value_vars=df_data.columns, ignore_index=False)

In [20]:
pivotted_data

Unnamed: 0,cut_no,variable,value
0,0,smcAC,-0.017090
1,0,smcAC,0.263672
2,0,smcAC,0.207520
3,0,smcAC,0.302734
4,0,smcAC,0.239258
...,...,...,...
8995,166,AE_spindle,0.101318
8996,166,AE_spindle,0.086060
8997,166,AE_spindle,0.084229
8998,166,AE_spindle,0.078125


In [21]:
import plotly.express as px

In [22]:
filtered_data = pivotted_data.query("cut_no==166")
fig = px.line(filtered_data, y="value", color='variable')

In [23]:
fig.show()

In [55]:
fig = px.line(filtered_data, y="value", facet_col="variable", color="variable", facet_row_spacing=0.01, facet_col_wrap=2)
# hide and lock down axes
#fig.update_xaxes(visible=True, fixedrange=True)
#fig.update_yaxes(visible=True, fixedrange=False)
fig.update_yaxes(showticklabels=True, col=2) # assuming second facet
#fig.update_xaxes(matches='x')

# remove facet/subplot labels
#fig.update_layout(annotations=[], overwrite=True)

# strip down the rest of the plot
fig.update_layout(
    showlegend=True,
    plot_bgcolor="white",
    margin=dict(t=10,l=10,b=10,r=10)
)

# disable the modebar for such a small plot
fig.show(config=dict(displayModeBar=False))