In [1]:
import ibmdata
import pandas as pd
import scipy.stats as stats

In [5]:
fev = '7HPP_ST1_CT_ALL'
lot_ids = ['A2Z7C.1','A2Z7B.1','A2Z34.1','A2YCZ.1']

def convert_list_to_SQL_input(mylist):
        out = ""
        for i,l in enumerate(mylist):
            if i != len(mylist)-1:
                out += "'" + l + "', "
            else:
                out += "'" + l + "'"
        return out
    
lot_ids = convert_list_to_SQL_input(lot_ids)

QUERY = f"""
SELECT lot_id, wafer_id, ewr_name, cell_name
FROM DMIW_SYSTEMS.EWR_EVENT_FACT eef
INNER JOIN DMIW_SYSTEMS.WAFER w ON eef.waferkey = w.waferkey
INNER JOIN DMIW_SYSTEMS.PRODUCT p ON eef.productkey = p.productkey
INNER JOIN DMIW_SYSTEMS.EWR_EVENT ee ON eef.ewr_eventkey = ee.ewr_eventkey
INNER JOIN DMIW_SYSTEMS.EWR_CELL ec ON eef.ewr_cellkey = ec.ewr_cellkey
WHERE lot_id IN ({lot_ids})
    AND EWR_NAME IN ('{fev}')
"""

df = ibmdata.isdw.query(QUERY)

2023-06-06 09:53:18,512 INFO     ibmdata   : retrieved (100, 4) (rows, cols) of data in 0:00:00.341665 from ISDW


In [8]:
df

Unnamed: 0,lot_id,wafer_id,ewr_name,cell_name
0,A2Z7C.1,A2Z7C-24,7HPP_ST1_CT_ALL,ctcd1
1,A2Z7C.1,A2Z7C-23,7HPP_ST1_CT_ALL,ctcd1
2,A2Z7C.1,A2Z7C-25,7HPP_ST1_CT_ALL,ctcd1
3,A2Z7C.1,A2Z7C-13,7HPP_ST1_CT_ALL,ct.3
4,A2Z7C.1,A2Z7C-08,7HPP_ST1_CT_ALL,ctcd1
...,...,...,...,...
95,A2YCZ.1,A2YCZ-20,7HPP_ST1_CT_ALL,ct.4_5s
96,A2YCZ.1,A2YCZ-01,7HPP_ST1_CT_ALL,ctcd1
97,A2YCZ.1,A2YCZ-09,7HPP_ST1_CT_ALL,ct.4_5s
98,A2YCZ.1,A2YCZ-21,7HPP_ST1_CT_ALL,ct.3


In [2]:
def convert_list_to_SQL_input(mylist):
    out = ""
    for i,l in enumerate(mylist):
        if i != len(mylist)-1:
            out += "'" + l + "', "
        else:
            out += "'" + l + "'"
    return out

In [3]:
def pull_data(DAYSBACK: int, fev: str, lot_ids: list):
    
    def convert_list_to_SQL_input(mylist):
        out = ""
        for i,l in enumerate(mylist):
            if i != len(mylist)-1:
                out += "'" + l + "', "
            else:
                out += "'" + l + "'"
        return out
    
    lot_ids = convert_list_to_SQL_input(lot_ids)
    SQL = f"""
    WITH ewr_cte AS(
        SELECT lot_id, wafer_id, ewr_name, cell_name
        FROM DMIW_SYSTEMS.EWR_EVENT_FACT eef
        INNER JOIN DMIW_SYSTEMS.WAFER w ON eef.waferkey = w.waferkey
        INNER JOIN DMIW_SYSTEMS.PRODUCT p ON eef.productkey = p.productkey
        INNER JOIN DMIW_SYSTEMS.EWR_EVENT ee ON eef.ewr_eventkey = ee.ewr_eventkey
        INNER JOIN DMIW_SYSTEMS.EWR_CELL ec ON eef.ewr_cellkey = ec.ewr_cellkey
        WHERE lot_id IN ({lot_ids})
            AND EWR_NAME IN ('{fev}')
    ),
    sail_cte AS(
        SELECT LEFT(wafer_id,5) AS lot_id_base, lot_id, wafer_id, family_code, 
          MIN(tw.Last_test_date) AS Sail_date, 1 AS Sail_n,
          AVG(CASE WHEN parm_Label = 'SAIL23_H2_0p50to1p05_PerfectYield' THEN yield ELSE NULL END) AS SAILALL_0p50to1p05,
          AVG(CASE WHEN parm_Label = 'SAIL23_H2_0p65to1p05_PerfectYield' THEN yield ELSE NULL END) AS SAILALL_0p65to1p05,
          AVG(CASE WHEN parm_Label = 'SAIL2_H2_0p50to1p05_PerfectYield' THEN yield ELSE NULL END) AS SAIL2_0p50to1p05,
          AVG(CASE WHEN parm_Label = 'SAIL2_H2_0p65to1p05_PerfectYield' THEN yield ELSE NULL END) AS SAIL2_0p65to1p05,
          AVG(CASE WHEN parm_Label = 'SAIL3_H2_0p50to1p05_PerfectYield' THEN yield ELSE NULL END) AS SAIL3_0p50to1p05,
          AVG(CASE WHEN parm_Label = 'SAIL3_H2_0p65to1p05_PerfectYield' THEN yield ELSE NULL END) AS SAIL3_0p65to1p05
        FROM DMIW.PTileWaferFact ptwf
        INNER JOIN DMIW_SYSTEMS.TestParm tp ON ptwf.testparmkey = tp.testparmkey
        INNER JOIN DMIW_SYSTEMS.TestedWafer tw ON ptwf.testedWaferKey = tw.testedWaferKey
        WHERE tw.Last_test_date >= (current date - {DAYSBACK} days)
          AND tw.Tech_id = '7HPP'
          AND tw.Level = 'H2'
          AND tw.Calcdefs in ('SSL11', 'SSL21', 'SSL31')
          AND weighted_Mean IS NOT null AND ABS(weighted_Mean) < 1e25
          AND UCASE(tp.parm_Label) LIKE '%_PERFECTYIELD'
          AND lot_id IN ({lot_ids})
        GROUP BY lot_id, wafer_id, family_code
        ORDER BY Sail_date desc, lot_id, wafer_id
    ),
    pele_cte AS(
        SELECT lot_Id, wafer_Id, family_code, MIN(last_Test_Date) AS Pele_date, 1 as Pele_n,
           AVG(CASE WHEN ucase(tp.parm_Label) = 'PELE1_H2_L3_HL_PERFECTYIELD' THEN yield ELSE null END) AS PELE1_L3_Vmax_PY,
           AVG(CASE WHEN ucase(tp.parm_Label) = 'PELE1_H2_L3_NL_PERFECTYIELD' THEN yield ELSE null END) AS PELE1_L3_Vnom_PY,
           AVG(CASE WHEN ucase(tp.parm_Label) = 'PELE1_H2_L3_LL_PERFECTYIELD' THEN yield ELSE null END) AS PELE1_L3_Vmin_PY,
           AVG(CASE WHEN ucase(tp.parm_Label) = 'PELE1_H2_1K_HL_PERFECTYIELD' THEN yield ELSE null END) AS PELE1_1K_Vmax_PY,
           AVG(CASE WHEN ucase(tp.parm_Label) = 'PELE1_H2_1K_NL_PERFECTYIELD' THEN yield ELSE null END) AS PELE1_1K_Vnom_PY,
           AVG(CASE WHEN ucase(tp.parm_Label) = 'PELE1_H2_1K_LL_PERFECTYIELD' THEN yield ELSE null END) AS PELE1_1K_Vmin_PY
        FROM DMIW.PTileWaferFact ptwf
        INNER JOIN DMIW_SYSTEMS.TestParm tp ON tp.testParmKey = ptwf.testParmKey 
        INNER JOIN DMIW_SYSTEMS.TestedWafer tw ON tw.testedWaferKey = ptwf.testedWaferKey
        WHERE Last_test_date >= (current date - {DAYSBACK} days) AND
            Tech_id = '7HPP' and Level = 'H2' AND tw.Calcdefs IN ('PEL11', 'PEL21', 'PEL31') AND
            weighted_Mean IS NOT null AND abs(weighted_Mean) < 1e25 AND
            (ucase(tp.parm_Label) LIKE 'PELE%_H2_%PERFECTYIELD')
            AND lot_id IN ({lot_ids})
        GROUP BY lot_Id, wafer_Id, family_code
    )
    SELECT s.lot_id_base, s.lot_id, s.family_code, ln.ewr_name, ln.cell_name, Sail_date, Sail_n, s.wafer_id, 
        SAILALL_0p50to1p05, SAILALL_0p65to1p05,
        SAIL2_0p50to1p05, SAIL2_0p65to1p05, SAIL3_0p50to1p05, SAIL3_0p65to1p05,
        Pele_date, Pele_n, s.wafer_id AS wafer_id_2,
        PELE1_L3_Vmax_PY, PELE1_L3_Vnom_PY, PELE1_L3_Vmin_PY,
        PELE1_1K_Vmax_PY, PELE1_1K_Vnom_PY, PELE1_1K_Vmin_PY
    FROM sail_cte s
    FULL OUTER JOIN pele_cte p ON s.wafer_id = p.wafer_id
    LEFT JOIN ewr_cte ln ON s.wafer_id = ln.wafer_id
    ORDER BY Sail_date desc, s.lot_id, s.wafer_id
    """
    return ibmdata.isdw.query(SQL)

In [4]:
df = pull_data(300, '7HPP_ST1_CT_ALL', ['A2Z7C.1','A2Z7B.1','A2Z34.1','A2YCZ.1'])

2023-04-19 17:45:42,882 INFO     ibmdata   : retrieved (91, 23) (rows, cols) of data in 0:00:00.756879 from ISDW


In [66]:
df.to_csv('~/Downloads/ewrdata.csv')

In [5]:
df.columns

Index(['lot_id_base', 'lot_id', 'family_code', 'ewr_name', 'cell_name',
       'sail_date', 'sail_n', 'wafer_id', 'sailall_0p50to1p05',
       'sailall_0p65to1p05', 'sail2_0p50to1p05', 'sail2_0p65to1p05',
       'sail3_0p50to1p05', 'sail3_0p65to1p05', 'pele_date', 'pele_n',
       'wafer_id_2', 'pele1_l3_vmax_py', 'pele1_l3_vnom_py',
       'pele1_l3_vmin_py', 'pele1_1k_vmax_py', 'pele1_1k_vnom_py',
       'pele1_1k_vmin_py'],
      dtype='object')

In [88]:
# Create ANOVA backbone table
data = [['Between Groups', '', '', '', '', '', ''], ['Within Groups', '', '', '', '', '', ''], ['Total', '', '', '', '', '', '']] 
anova_table = pd.DataFrame(data, columns = ['Source of Variation', 'SS', 'df', 'MS', 'F', 'P-value', 'F crit']) 
anova_table.set_index('Source of Variation', inplace = True)

parm = 'sailall_0p65to1p05'
split = 'cell_name'

# calculate SSTR and update anova table
x_bar = df[parm].mean()
SSTR = df.groupby(split).count() * (df.groupby(split).mean(numeric_only=True) - x_bar)**2
anova_table['SS']['Between Groups'] = SSTR[parm].sum()

# calculate SSE and update anova table
SSE = (df.groupby(split).count() - 1) * df.groupby(split).std(numeric_only=True)**2
anova_table['SS']['Within Groups'] = SSE[parm].sum()

# calculate SSTR and update anova table
SSTR = SSTR[parm].sum() + SSE[parm].sum()
anova_table['SS']['Total'] = SSTR

# update degree of freedom
anova_table['df']['Between Groups'] = df[split].nunique() - 1
anova_table['df']['Within Groups'] = df.shape[0] - df[split].nunique()
anova_table['df']['Total'] = df.shape[0] - 1

# calculate MS
anova_table['MS'] = anova_table['SS'] / anova_table['df']

# calculate F 
F = anova_table['MS']['Between Groups'] / anova_table['MS']['Within Groups']
anova_table['F']['Between Groups'] = F

# p-value
anova_table['P-value']['Between Groups'] = 1 - stats.f.cdf(F, anova_table['df']['Between Groups'], anova_table['df']['Within Groups'])

# F critical 
alpha = 0.05
# possible types "right-tailed, left-tailed, two-tailed"
tail_hypothesis_type = "two-tailed"
if tail_hypothesis_type == "two-tailed":
    alpha /= 2
anova_table['F crit']['Between Groups'] = stats.f.ppf(1-alpha, anova_table['df']['Between Groups'], anova_table['df']['Within Groups'])

# Final ANOVA Table
anova_table

Unnamed: 0_level_0,SS,df,MS,F,P-value,F crit
Source of Variation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Between Groups,8.490427,3,2.830142,2.176182,0.096572,3.270163
Within Groups,113.144189,87,1.300508,,,
Total,121.634615,90,1.351496,,,


In [89]:
# The p-value approach
print("Approach 1: The p-value approach to hypothesis testing in the decision rule")
conclusion = "Failed to reject the null hypothesis."
if anova_table['P-value']['Between Groups'] <= alpha:
    conclusion = "Null Hypothesis is rejected."
print("F-score is:", anova_table['F']['Between Groups'], " and p value is:", anova_table['P-value']['Between Groups'])    
print(conclusion)
    
# The critical value approach
print("\n--------------------------------------------------------------------------------------")
print("Approach 2: The critical value approach to hypothesis testing in the decision rule")
conclusion = "Failed to reject the null hypothesis."
if anova_table['F']['Between Groups'] > anova_table['F crit']['Between Groups']:
    conclusion = "Null Hypothesis is rejected."
print("F-score is:", anova_table['F']['Between Groups'], " and critical value is:", anova_table['F crit']['Between Groups'])
print(conclusion)

Approach 1: The p-value approach to hypothesis testing in the decision rule
F-score is: 2.1761822671573925  and p value is: 0.09657170239917012
Failed to reject the null hypothesis.

--------------------------------------------------------------------------------------
Approach 2: The critical value approach to hypothesis testing in the decision rule
F-score is: 2.1761822671573925  and critical value is: 3.270163438711064
Failed to reject the null hypothesis.
