In [10]:
import pandas as pd
import numpy as np
import statsmodels.api as sm # you must have installed statsmodels -> pip install statsmodels
from statsmodels.formula.api import ols

df = pd.read_excel('./Gage_Study.xlsx')

## Validating the data

In [11]:
df

Unnamed: 0,Operator,Week,Part,Measure,Evaluated feature
0,2,3,4,1.480,Width
1,1,1,4,1.480,Width
2,3,2,9,1.481,Width
3,2,2,6,1.481,Width
4,3,2,5,1.478,Width
...,...,...,...,...,...
85,3,3,7,1.487,Width
86,2,2,9,1.482,Width
87,3,3,8,1.481,Width
88,2,1,1,1.480,Width


## Sorting data

In [12]:
# Sort by column 'Operator' and 'Week' in ascending order
df.sort_values(by=['Operator', 'Week', 'Part'], inplace=True)

# Reset indexes after sorting
df.reset_index(drop=True, inplace=True)

df.drop('Evaluated feature', axis=1, inplace=True)

output_file_path = './Gage_Study_Sorted.xlsx'
df.to_excel(output_file_path, index=False)

print(f"The sorted data has been stored in: {output_file_path}")

The sorted data has been stored in: ./Gage_Study_Sorted.xlsx


In [13]:
df

Unnamed: 0,Operator,Week,Part,Measure
0,1,1,1,1.480
1,1,1,2,1.481
2,1,1,3,1.482
3,1,1,4,1.480
4,1,1,5,1.478
...,...,...,...,...
85,3,3,6,1.481
86,3,3,7,1.487
87,3,3,8,1.481
88,3,3,9,1.482


# R&R analysis

In [22]:

# Crear un modelo ANOVA para evaluar la repetibilidad y reproducibilidad
model = ols('Measure ~ Operator + Part + Operator:Part', data=df).fit()

# Imprimir el resumen del modelo ANOVA
# print(model.summary())

# Obtener la tabla ANOVA
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)



                     sum_sq    df         F    PR(>F)
Operator       1.066667e-06   1.0  0.196903  0.658347
Part           1.031145e-05   1.0  1.903455  0.171269
Operator:Part  3.959596e-07   1.0  0.073093  0.787534
Residual       4.658815e-04  86.0       NaN       NaN


# Sample size

In [24]:
import scipy.stats as stats

# Given parameters
lower_limit = 1.38
upper_limit = 1.52
desired_error = 0.001
standard_deviation = 0.004
casetones_production = 2800

# 95% confidence level for a normal distribution (two-tailed)
z_score = stats.norm.ppf(0.975)

# Calculate the sample size
sample_size = ((z_score * standard_deviation) / desired_error)**2

# Adjust the sample size to the nearest integer
adjusted_sample_size = int(np.ceil(sample_size))

# Consider the total production and adjust if necessary
if adjusted_sample_size > casetones_production:
    adjusted_sample_size = casetones_production

# Print the result
print(f"Required sample size: {adjusted_sample_size}")

Required sample size: 62


#### Required sample size: 62