Control charts

In [None]:
# Import the dataset
data = pd.read_csv('')

data.head()

Moving ranges I-MR

In [None]:
# With command 
df = qda.ControlCharts.IMR(data, '')

In [None]:
# Compute the moving ranges using the diff function
data['MR'] = data[''].diff().abs()

In [None]:
# Define the constants for the control limits
d2 = qda.constants.getd2(2)
D4 = qda.constants.getD4(2)

# make a copy of the data
df = data.copy()
# change the name of the column time to I
df.rename(columns={'':'I'}, inplace=True)

# Create columns for the upper and lower control limits
df['I_UCL'] = df['I'].mean() + (3*df['MR'].mean()/d2)
df['I_CL'] = df['I'].mean()
df['I_LCL'] = df['I'].mean() - (3*df['MR'].mean()/d2)
df['MR_UCL'] = D4 * df['MR'].mean()
df['MR_CL'] = df['MR'].mean()
df['MR_LCL'] = 0
# Define columns for the alarm rule
df['I_TEST1'] = np.where((df['I'] > df['I_UCL']) | 
                (df['I'] < df['I_LCL']), df['I'], np.nan)
df['MR_TEST1'] = np.where((df['MR'] > df['MR_UCL']) | 
                (df['MR'] < df['MR_LCL']), df['MR'], np.nan)

df.head()

In [None]:
# Plot the I chart
plt.title('I chart')
plt.plot(df['I'], color='b', linestyle='--', marker='o')
plt.plot(df['I'], color='b', linestyle='--', marker='o')
plt.plot(df['I_UCL'], color='r')
plt.plot(df['I_CL'], color='g')
plt.plot(df['I_LCL'], color='r')
plt.ylabel('Individual Value')
plt.xlabel('Sample number')
# add the values of the control limits on the right side of the plot
plt.text(len(df)+.5, df['I_UCL'].iloc[0], 'UCL = {:.2f}'.format(df['I_UCL'].iloc[0]), verticalalignment='center')
plt.text(len(df)+.5, df['I_CL'].iloc[0], 'CL = {:.2f}'.format(df['I_CL'].iloc[0]), verticalalignment='center')
plt.text(len(df)+.5, df['I_LCL'].iloc[0], 'LCL = {:.2f}'.format(df['I_LCL'].iloc[0]), verticalalignment='center')
# highlight the points that violate the alarm rules
plt.plot(df['I_TEST1'], linestyle='none', marker='s', color='r', markersize=10)
plt.show()

In [None]:
plt.title('MR chart')
plt.plot(df['MR'], color='b', linestyle='--', marker='o')
plt.plot(df['MR_UCL'], color='r')
plt.plot(df['MR_CL'], color='g')
plt.plot(df['MR_LCL'], color='r')
plt.ylabel('Moving Range')
plt.xlabel('Sample number')
# add the values of the control limits on the right side of the plot
plt.text(len(df)+.5, df['MR_UCL'].iloc[0], 'UCL = {:.2f}'.format(df['MR_UCL'].iloc[0]), verticalalignment='center')
plt.text(len(df)+.5, df['MR_CL'].iloc[0], 'CL = {:.2f}'.format(df['MR_CL'].iloc[0]), verticalalignment='center')
plt.text(len(df)+.5, df['MR_LCL'].iloc[0], 'LCL = {:.2f}'.format(df['MR_LCL'].iloc[0]), verticalalignment='center')
# highlight the points that violate the alarm rules
plt.plot(df['MR_TEST1'], linestyle='none', marker='s', color='r', markersize=10)
plt.show()

Data Model

In [None]:
# Add a column with the sample number to use as regressor
data['t'] = np.arange(1, len(data)+1)

data.head()

In [None]:
# Fit linear regression model
x = data['t']
x = sm.add_constant(x)
y = data['time']
model = sm.OLS(y,x).fit()

# Print model summary
qda.summary(model)

In [None]:
# Assumptions on residuals 
fig, axs = plt.subplots(2, 2)
fig.suptitle('Residual Plots')
stats.probplot(model.resid, dist="norm", plot=axs[0,0])
axs[0,0].set_title('Normal probability plot')
axs[0,1].scatter(model.fittedvalues, model.resid)
axs[0,1].set_title('Versus Fits')
axs[1,0].hist(model.resid)
axs[1,0].set_title('Histogram')
axs[1,1].plot(np.arange(1, len(model.resid)+1), model.resid, 'o-')
_, pval_SW_res = stats.shapiro(model.resid)
print('Shapiro-Wilk test p-value on the residuals = %.3f' % pval_SW_res)

_, pval_runs_res = runstest_1samp(model.resid, correction=False)
print('Runs test p-value on the residuals = {:.3f}'.format(pval_runs_res))
fig, ax = plt.subplots(2, 1)
sgt.plot_acf(model.resid, lags = int(len(data)/3), zero=False, ax=ax[0])
sgt.plot_pacf(model.resid, lags = int(len(data)/3), zero=False, ax=ax[1], 
            method = 'ywm')
plt.show()

Trend control chart

In [None]:
df_res = pd.DataFrame({'I': model.resid})
df_res['MR'] = df_res['I'].diff().abs()

df_res.describe()

In [None]:
# FVC

# Recalculate control limits 
df['I_CL'] = model.fittedvalues
df['I_UCL'] = df['I_CL'] + 3 * df_res['MR'].mean() / d2
df['I_LCL'] = df['I_CL'] - 3 * df_res['MR'].mean() / d2
df['I_TEST1'] = np.where((df['I'] > df['I_UCL']) | (df['I'] < df['I_LCL']), df['I'], np.nan)

In [None]:
#if we know it can't go under 0 can use this
#df['I_LCL'] = np.where((df['I_LCL'] < 0), 0, df['I_LCL'])

In [None]:
# Plot the I chart
plt.title('I chart')
plt.plot(df['I'], color='b', linestyle='--', marker='o')
plt.plot(df['I'], color='b', linestyle='--', marker='o')
plt.plot(df['I_UCL'], color='r')
plt.plot(df['I_CL'], color='g')
plt.plot(df['I_LCL'], color='r')
plt.ylabel('Individual Value')
plt.xlabel('Sample number')
# highlight the points that violate the alarm rules
plt.plot(df['I_TEST1'], linestyle='none', marker='s', 
        color='r', markersize=10)
plt.show()

In [None]:
# SCC

# Control limits using residuals
df_res['I_UCL'] = df_res['I'].mean() + (3*df_res['MR'].mean()/d2)
df_res['I_CL'] = df_res['I'].mean()
df_res['I_LCL'] = df_res['I'].mean() - (3*df_res['MR'].mean()/d2)
df_res['MR_UCL'] = D4 * df_res['MR'].mean()
df_res['MR_CL'] = df_res['MR'].mean()
df_res['MR_LCL'] = 0
# Define columns for the Western Electric alarm rules
df_res['I_TEST1'] = np.where((df_res['I'] > df_res['I_UCL']) | 
            (df_res['I'] < df_res['I_LCL']), df_res['I'], np.nan)
df_res['MR_TEST1'] = np.where((df_res['MR'] > df_res['MR_UCL']) | 
            (df_res['MR'] < df_res['MR_LCL']), df_res['MR'], np.nan)

df_res.head()

In [None]:
# Plot the I chart
plt.title('I chart')
plt.plot(df_res['I'], color='b', linestyle='--', marker='o')
plt.plot(df_res['I'], color='b', linestyle='--', marker='o')
plt.plot(df_res['I_UCL'], color='r')
plt.plot(df_res['I_CL'], color='g')
plt.plot(df_res['I_LCL'], color='r')
plt.ylabel('Individual Value')
plt.xlabel('Sample number')
# add the values of the control limits on the right side of the plot
# print the first value of the column I_UCL
plt.text(len(df_res)+.5, df_res['I_UCL'].iloc[0], 
        'UCL = {:.2f}'.format(df_res['I_UCL'].iloc[0]), 
        verticalalignment='center')
plt.text(len(df_res)+.5, df_res['I_CL'].iloc[0], 
        'CL = {:.2f}'.format(df_res['I_CL'].iloc[0]), 
        verticalalignment='center')
plt.text(len(df_res)+.5, df_res['I_LCL'].iloc[0], 
        'LCL = {:.2f}'.format(df_res['I_LCL'].iloc[0]), 
        verticalalignment='center')
# highlight the points that violate the alarm rules
plt.plot(df_res['I_TEST1'], linestyle='none', marker='s', 
        color='r', markersize=10)
plt.show()

In [None]:
plt.title('MR chart')
plt.plot(df_res['MR'], color='b', linestyle='--', marker='o')
plt.plot(df_res['MR_UCL'], color='r')
plt.plot(df_res['MR_CL'], color='g')
plt.plot(df_res['MR_LCL'], color='r')
plt.ylabel('Moving Range')
plt.xlabel('Sample number')
# add the values of the control limits on the right side of the plot
plt.text(len(df_res)+.5, df_res['MR_UCL'].iloc[0], \
         'UCL = {:.2f}'.format(df_res['MR_UCL'].iloc[0]), 
         verticalalignment='center')
plt.text(len(df_res)+.5, df_res['MR_CL'].iloc[0], 
        'CL = {:.2f}'.format(df_res['MR_CL'].iloc[0]), 
        verticalalignment='center')
plt.text(len(df_res)+.5, df_res['MR_LCL'].iloc[0], 
        'LCL = {:.2f}'.format(df_res['MR_LCL'].iloc[0]), 
        verticalalignment='center')
# highlight the points that violate the alarm rules
plt.plot(df_res['MR_TEST1'], linestyle='none', marker='s', 
        color='r', markersize=10)
plt.show()

In [None]:
# If ooc with assignable cause 

OOC_idx = np.where(data_IMR['I_TEST1'].notnull())[0]
# Create the dummy variable X
data['dummy'] = np.where((data['t'] == OOC_idx), 1, 0)
data.head()

In [None]:
# Fit a model
x = data[['t', 'dummy']]
x = sm.add_constant(x)
y = data['time']
model2 = sm.OLS(y,x).fit()
qda.summary(model2)

#plot model
plt.plot(data['t'], data['time'], color='b', linestyle='-', marker='o')
plt.plot(data['t'], model2.fittedvalues, color='r')
plt.show()

In [None]:
# Residual check
fig, axs = plt.subplots(2, 2)
fig.suptitle('Residual Plots')
stats.probplot(model2.resid, dist="norm", plot=axs[0,0])
axs[0,0].set_title('Normal probability plot')
axs[0,1].scatter(model2.fittedvalues, model.resid)
axs[0,1].set_title('Versus Fits')
axs[1,0].hist(model2.resid)
axs[1,0].set_title('Histogram')
axs[1,1].plot(np.arange(1, len(model2.resid)+1), model2.resid, 'o-')

_, pval_SW_res = stats.shapiro(model2.resid)
print('Shapiro-Wilk test p-value on the residuals = %.3f' % pval_SW_res)

_, pval_runs_res = runstest_1samp(model2.resid, correction=False)
print('Runs test p-value on the residuals = {:.3f}'.format(pval_runs_res))
fig, ax = plt.subplots(2, 1)
sgt.plot_acf(model2.resid, lags = int(len(data)/3), zero=False, ax=ax[0])
sgt.plot_pacf(model2.resid, lags = int(len(data)/3), zero=False, ax=ax[1],
            method = 'ywm')
plt.show()

In [None]:
# Make a new dataframe for the new residuals
df_res2 = pd.DataFrame({'res2': model2.resid})

# Create the IMR chart
df_res2 = qda.ControlCharts.IMR(df_res2, 'res2')

AR Control Chart

In [None]:
# Add a column with the lagged var to use as regressor
data['lag1'] = data[''].shift(1)

x = data['lag1'][1:]
x = sm.add_constant(x) # this command is used to consider a constant to the model, is equivalent to create and add a column of ones
y = data[''][1:]
model = sm.OLS(y, x).fit()
qda.summary(model)

#then check residuals 

In [None]:
# SCC
#approximation not working like with trend data    #????

# Put residuals in dataframe
df_SCC = pd.DataFrame(model.resid, columns=['res'])
# Plot IMR control chart
df_SCC_IMR = qda.ControlCharts.IMR(df_SCC, 'res')

In [None]:
#plot original vs fitted data
plt.plot(data[''], color='b', linestyle='-', marker='o', label='temp')
plt.plot(model.fittedvalues, color='r', linestyle='--', marker='s', label='FITS')
plt.legend()
plt.show()

In [None]:
# Model based control chart

d2 = qda.constants.getd2(2)
MRbar_res = df_SCC_IMR['MR_CL'].iloc[0]
# new dataframe with original data and center line (fitted values)
df = pd.DataFrame({'I': data['temp'].iloc[1:], 'I_CL': model.fittedvalues}, index=data.index[1:])
df['I_UCL'] = df['I_CL'] + 3 * MRbar_res / d2
df['I_LCL'] = df['I_CL'] - 3 * MRbar_res / d2
df['I_TEST1'] = np.where((df['I'] > df['I_UCL']) | (df['I'] < df['I_LCL']), df['I'], np.nan)

In [None]:
# Plot the I chart
plt.title('I chart')
plt.plot(df['I'], color='b', linestyle='--', marker='o')
plt.plot(df['I'], color='b', linestyle='--', marker='o')
plt.plot(df['I_UCL'], color='r')
plt.plot(df['I_CL'], color='g')
plt.plot(df['I_LCL'], color='r')
plt.ylabel('Individual Value')
plt.xlabel('Sample number')
# highlight the points that violate the alarm rules
plt.plot(df['I_TEST1'], linestyle='none', marker='s', 
        color='r', markersize=10)
plt.show()

I-MR-R Chart

In [None]:
#create between groups control chart
data_XR = qda.ControlCharts.XbarR(data)
#new dataframe from sample means
data_Xbar = pd.DataFrame(data_XR['sample_mean'])
data_Xbar = qda.ControlCharts.IMR(data_Xbar, 'sample_mean')

In [None]:
# I-MR-R control chart

# Build the IMR chart using this new dataframe
data_Xbar = qda.ControlCharts.IMR(data_Xbar, 'sample_mean')

# Plot the R chart as well
plt.title('R chart')
plt.plot(data_XR['sample_range'], color='b', linestyle='--', marker='o')
plt.plot(data_XR['R_UCL'], color='r')
plt.plot(data_XR['R_CL'], color='g')
plt.plot(data_XR['R_LCL'], color='r')
plt.ylabel('Sample range')
plt.xlabel('Sample number')
# add the values of the control limits on the right side of the plot
plt.text(len(data_XR)+.5, data_XR['R_UCL'].iloc[0], 'UCL = {:.3f}'.format(data_XR['R_UCL'].iloc[0]), verticalalignment='center')
plt.text(len(data_XR)+.5, data_XR['R_CL'].iloc[0], 'CL = {:.3f}'.format(data_XR['R_CL'].iloc[0]), verticalalignment='center')
plt.text(len(data_XR)+.5, data_XR['R_LCL'].iloc[0], 'LCL = {:.3f}'.format(data_XR['R_LCL'].iloc[0]), verticalalignment='center')
# highlight the points that violate the alarm rules
plt.plot(data_XR['R_TEST1'], linestyle='none', marker='s', color='r', markersize=10)
plt.show()
