In [None]:
import ibmdata
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Original Pull From QDAT

In [None]:
columns = """
VERSION, LOTNUMBER, PRODUCT_SCHEMA, WAFER_ID, CHIPX, CHIPY, KERF_X, KERF_Y,
RADIUS_CENTER_5_NEW, RADIUS_CENTER_5, MOL_VMAX, RVT_PFET_VMAX,
CASE
    WHEN PRODUCT_SCHEMA = 'p10dd2' THEN RVT_PFET_VMAX + 0.018
    ELSE RVT_PFET_VMAX
END AS RVTP_VMAX_18,
ULVT_PFET_VMAX, 
CASE
    WHEN PRODUCT_SCHEMA = 'p10dd2' THEN ULVT_PFET_VMAX + 0.018
    ELSE ULVT_PFET_VMAX
END AS ULVTP_VMAX_18,
VMAXPFET, SLVT_NFET_VMAX, 
CASE
    WHEN PRODUCT_SCHEMA = 'p10dd2' THEN SLVT_NFET_VMAX + 0.018
    ELSE SLVT_NFET_VMAX
END AS SLVTN_VMAX_18,
OPDB_VMAX, MOL54, 
CASE
    WHEN PRODUCT_SCHEMA = 'p10dd2' THEN MOL54 + 0.018
    ELSE MOL54
END AS MOL54_VMAX_18, 
MOL60,
CASE
    WHEN PRODUCT_SCHEMA = 'p10dd2' THEN MOL60 + 0.018
    ELSE MOL60
END AS MOL60_VMAX_18,
FILE_DATE, TIMESTAMP, DATE(timestamp) as Test_Date, VARCHAR_FORMAT(timestamp,'YYYY-MM') AS Test_Month,
VARCHAR_FORMAT(timestamp,'YYYY-WW') AS Test_Week
"""
DAYSBACK = 120

QUERY = f"""
SELECT {columns}, NULL AS LVT_PFET_VMAX
FROM
    P10DD2.OPDBVMAX
WHERE
    DATE(timestamp) >= (CURRENT DATE - {DAYSBACK} days)
    AND version = 1009
    AND wafer_id like 'A2F3V-01'
UNION
SELECT {columns}, LVT_PFET_VMAX
FROM
    zArtemis.OPDBVMAX
WHERE
    DATE(timestamp) >= (CURRENT DATE - {DAYSBACK} days)
    AND version = 1004
    AND wafer_id like 'A2F3V-01'
ORDER BY
    PRODUCT_SCHEMA, wafer_id, chipx, chipy, test_date
"""

In [None]:
df = ibmdata.qdat.query(QUERY)
df = df.drop_duplicates(subset = ['wafer_id','chipx','chipy'],keep='last')
df['mol_vmax'] = df['mol_vmax'].astype(float).round(2)
df['rvt_pfet_vmax'] = df['rvt_pfet_vmax'].astype(float).round(2)
df['rvtp_vmax_18'] = df['rvtp_vmax_18'].astype(float).round(2)
df['ulvt_pfet_vmax'] = df['ulvt_pfet_vmax'].astype(float).round(2)
df['ulvtp_vmax_18'] = df['ulvtp_vmax_18'].astype(float).round(2)
df['vmaxpfet'] = df['vmaxpfet'].astype(float).round(2)
df['slvt_nfet_vmax'] = df['slvt_nfet_vmax'].astype(float).round(2)
df['slvtn_vmax_18'] = df['slvtn_vmax_18'].astype(float).round(2)
df['opdb_vmax'] = df['opdb_vmax'].astype(float).round(2)
df['mol54'] = df['mol54'].astype(float).round(2)
df['mol54_vmax_18'] = df['mol54_vmax_18'].astype(float).round(2)
df['mol60'] = df['mol60'].astype(float).round(2)
df['mol60_vmax_18'] = df['mol60_vmax_18'].astype(float).round(2)

In [None]:
df.iloc[:,0:20]

In [None]:
df.to_csv('/Users/acyang/Downloads/tmp1.csv')

# Pull From ISDW

In [None]:
def generate_SQL_GroupBy(mydict, parm_label = 'parm_Label', val = 'parmYield', agg = 'AVG'):
    output = ''
    n = len(mydict)
    for i,parm in enumerate(mydict):
        if i == n-1:
            output += f"{agg}(CASE WHEN {parm_label} = '{parm}' THEN {val} ELSE NULL END) AS {parm}"
        else:
            output += f"{agg}(CASE WHEN {parm_label} = '{parm}' THEN {val} ELSE NULL END) AS {parm},\n"
    return output

In [None]:
parms = ['MOL_VMAX','RVT_PFET_VMAX','ULVT_PFET_VMAX','VmaxPFET','SLVT_NFET_VMAX','OPDB_VMAX','MOL54','MOL60','LVT_PFET_VMAX']
cases = generate_SQL_GroupBy(parms, 'parm_label', 'parmvalue')

In [None]:
DAYSBACK = 300
QUERY = f"""
SELECT lotlabel AS lotnumber, family_code, wafer_id, 
    normalized_testx AS chipx, normalized_testy AS chipy, kerf_x, kerf_y,
    radius_center_5, quadrant, MAX(last_testtimestamp) AS timestamp, 
    MAX(DATE(last_testtimestamp)) AS Test_Date, MAX(VARCHAR_FORMAT(last_testtimestamp,'YYYY-MM')) AS Test_Month,
    MAX(VARCHAR_FORMAT(last_testtimestamp,'YYYY-WW')) AS Test_Week, {cases},
    AVG(CASE
        WHEN family_code = 'Q6' AND parm_label = 'RVT_PFET_VMAX' THEN parmvalue + 0.018
        WHEN family_code = 'X2' AND parm_label = 'RVT_PFET_VMAX' THEN parmvalue
        ELSE NULL
    END) AS RVTP_VMAX_18,
    AVG(CASE
        WHEN family_code = 'Q6' AND parm_label = 'ULVT_PFET_VMAX' THEN parmvalue + 0.018
        WHEN family_code = 'X2' AND parm_label = 'ULVT_PFET_VMAX' THEN parmvalue
        ELSE NULL
    END) AS ULVTP_VMAX_18,
    AVG(CASE
        WHEN family_code = 'Q6' AND parm_label = 'SLVT_NFET_VMAX' THEN parmvalue + 0.018
        WHEN family_code = 'X2' AND parm_label = 'SLVT_NFET_VMAX' THEN parmvalue
        ELSE NULL
    END) AS SLVTN_VMAX_18,
    AVG(CASE
        WHEN family_code = 'Q6' AND parm_label = 'MOL54' THEN parmvalue + 0.018
        WHEN family_code = 'X2' AND parm_label = 'MOL54' THEN parmvalue
        ELSE NULL
    END) AS MOL54_VMAX_18,
    AVG(CASE
        WHEN family_code = 'Q6' AND parm_label = 'MOL60' THEN parmvalue + 0.018
        WHEN family_code = 'X2' AND parm_label = 'MOL60' THEN parmvalue
        ELSE NULL
    END) AS MOL60_VMAX_18
FROM
    DMIW.ChipParmFactR cpfr
    INNER JOIN DMIW_SYSPRC.testparm tp ON cpfr.testparmkey = tp.testparmkey
    INNER JOIN DMIW_SYSPRC.Geography g ON cpfr.geographyKey = g.geographyKey
    INNER JOIN DMIW_SYSPRC.testedwafer tw ON cpfr.testedwaferkey = tw.testedwaferkey
WHERE
    Tech_id = '7HPP' AND tw.Calcdefs = 'WL01R'
    AND DATE(tw.last_TestTimeStamp) >= (CURRENT DATE - {DAYSBACK} days)
    AND family_code IN ('Q6', 'X2')
    AND wafer_id like 'A2F3V-01'
GROUP BY 
    lotlabel,family_code,wafer_id,normalized_testx,normalized_testy,kerf_x, kerf_y,radius_center_5,quadrant
ORDER BY
    family_code, wafer_id, chipx, chipy, test_date
"""

In [None]:
df = ibmdata.isdw.query(QUERY)
df = df.drop_duplicates(subset = ['wafer_id','chipx','chipy'],keep='last')
df['product_schema'] = df['family_code'].apply(lambda x: 'p10dd2' if x == 'Q6' else 'zadd2')
df['mol_vmax'] = df['mol_vmax'].astype(float).round(2)
df['rvt_pfet_vmax'] = df['rvt_pfet_vmax'].astype(float).round(2)
df['rvtp_vmax_18'] = df['rvtp_vmax_18'].astype(float).round(2)
df['ulvt_pfet_vmax'] = df['ulvt_pfet_vmax'].astype(float).round(2)
df['ulvtp_vmax_18'] = df['ulvtp_vmax_18'].astype(float).round(2)
df['vmaxpfet'] = df['vmaxpfet'].astype(float).round(2)
df['slvt_nfet_vmax'] = df['slvt_nfet_vmax'].astype(float).round(2)
df['slvtn_vmax_18'] = df['slvtn_vmax_18'].astype(float).round(2)
df['opdb_vmax'] = df['opdb_vmax'].astype(float).round(2)
df['mol54'] = df['mol54'].astype(float).round(2)
df['mol54_vmax_18'] = df['mol54_vmax_18'].astype(float).round(2)
df['mol60'] = df['mol60'].astype(float).round(2)
df['mol60_vmax_18'] = df['mol60_vmax_18'].astype(float).round(2)

In [None]:
df

In [None]:
# Run this line to save the data as a csv file
# You might have to change the file path for destination
df.to_csv('~/Downloads/tmp2.csv')

# Generate Tables and Charts

In [None]:
df['num_bad_chips'] = df.apply(lambda x: 1 if (x.product_schema == 'p10dd2' and x.opdb_vmax < 0.9) or (x.product_schema == 'zadd2' and x.opdb_vmax < 1.05) else 0, axis = 1)
df['%_bad_chips'] = df.apply(lambda x: 1 if (x.product_schema == 'p10dd2' and x.opdb_vmax < 0.9) or (x.product_schema == 'zadd2' and x.opdb_vmax < 1.05) else 0, axis = 1)

In [None]:
df.groupby(['product_schema','test_month']).agg({'wafer_id':lambda x: x.nunique(),'num_bad_chips':'sum','%_bad_chips':'mean', 'opdb_vmax':'mean','rvt_pfet_vmax':'mean',
      'rvtp_vmax_18':'mean','ulvt_pfet_vmax':'mean','ulvtp_vmax_18':'mean',
      'slvt_nfet_vmax':'mean','slvtn_vmax_18':'mean','mol54':'mean',
      'mol54_vmax_18':'mean','mol60':'mean','mol60_vmax_18':'mean','mol_vmax':'mean',
      'vmaxpfet':'mean','pfet_lvt_vbd_scale':'mean'}).reset_index().rename(columns={'wafer_id':'num_wafers'})

In [None]:
df.groupby(['product_schema','lotnumber','wafer_id']).agg({'test_date':'min','num_bad_chips':'sum','%_bad_chips':'mean', 'opdb_vmax':'mean','rvt_pfet_vmax':'mean',
      'rvtp_vmax_18':'mean','ulvt_pfet_vmax':'mean','ulvtp_vmax_18':'mean',
      'slvt_nfet_vmax':'mean','slvtn_vmax_18':'mean','mol54':'mean',
      'mol54_vmax_18':'mean','mol60':'mean','mol60_vmax_18':'mean','mol_vmax':'mean',
      'vmaxpfet':'mean','pfet_lvt_vbd_scale':'mean'}).reset_index()

In [None]:
df.groupby('product_schema').apply(lambda x: 100*len(x[x['opdb_vmax']<1])/len(x)).round(4).reset_index(name='opdb_vmax < 1 (%)')

In [None]:
t = df[['rvtp_vmax_18', 'ulvtp_vmax_18', 'slvtn_vmax_18', 'mol54_vmax_18', 'mol60_vmax_18']].agg(['mean','median','std']).transpose().reset_index().rename(columns={'index':'device','mean':'mean (V)','median':'median (V)', 'std':'std (mV)'})
t['std (mV)'] = t['std (mV)'].map(lambda x: x*1000)
t

In [None]:
data = df.groupby('product_schema').agg({'opdb_vmax':['mean','median','std']}).reset_index().rename(columns={'mean':'mean (V)','median':'median (V)', 'std':'std (mV)'})
data.columns = data.columns.droplevel()
data['std (mV)'] = data['std (mV)'].map(lambda x: x*1000)
data.round(4)

In [None]:
data = pd.pivot_table(df, values='wafer_id', 
                                index='product_schema', 
                                columns='test_month', 
                                aggfunc=lambda x: x.nunique())
data.columns.name = None
data['total'] = data.sum(axis = 1)
data = data.reset_index().rename(columns={'product_schema':'family_code'})
data

In [None]:
fig = px.ecdf(df[df['product_schema'] == 'p10dd2'], x=['rvtp_vmax_18', 'ulvtp_vmax_18',
       'slvtn_vmax_18','mol54_vmax_18','mol60_vmax_18']) #, marginal='histogram')
fig.update_xaxes(range=[1, 1.5])

In [None]:
fig = px.ecdf(df[df['product_schema'] == 'zadd2'], x=['rvtp_vmax_18', 'ulvtp_vmax_18',
       'slvtn_vmax_18','mol54_vmax_18','mol60_vmax_18']) #, marginal='histogram')
fig.update_xaxes(range=[1, 1.5])

In [None]:
fig = px.box(df[df['product_schema'] == 'p10dd2'],y=['rvtp_vmax_18', 'ulvtp_vmax_18',
       'slvtn_vmax_18','mol54_vmax_18','mol60_vmax_18'])
fig.show()

In [None]:
fig = px.box(df[df['product_schema'] == 'zadd2'],y=['rvtp_vmax_18', 'ulvtp_vmax_18',
       'slvtn_vmax_18','mol54_vmax_18','mol60_vmax_18'])
fig.show()

In [None]:
def multigroup(df, xval, yvals, xylabels, subplots=False):
    if subplots:
        n = len(yvals)
        fig = make_subplots(rows=1, cols=n, subplot_titles=(yvals))
        for i,yval in enumerate(yvals):
            fig.add_trace(go.Box(y=df[yval],x=df[xval],name=yval),row=1, col=i+1)
            fig.update_xaxes(title_text=xylabels[0], row=1, col=i+1)
        fig.update_layout(
            yaxis_title=xylabels[1]           
        )
    else:
        fig = go.Figure()
        for yval in yvals:
            fig.add_trace(go.Box(y=df[yval],x=df[xval],name=yval))
        fig.update_layout(
            yaxis_title=xylabels[1],
            xaxis_title=xylabels[0],
            boxmode='group' # group together boxes of the different traces for each value of x
        )
    return fig

In [None]:
plotdf.test_month

In [None]:
plotdf = df[df['product_schema'] == 'p10dd2']
xval = 'test_month'
yvals = ['rvtp_vmax_18', 'ulvtp_vmax_18','slvtn_vmax_18']
#yvals = ['mol54_vmax_18','mol60_vmax_18']
fig = multigroup(plotdf,xval,yvals,xylabels=['Month','Vmax'],subplots=False)
fig.show()

In [None]:
plotdf = df[df['product_schema'] == 'p10dd2']
xval = 'test_month'
yvals = ['rvtp_vmax_18', 'ulvtp_vmax_18','slvtn_vmax_18']
#yvals = ['mol54_vmax_18','mol60_vmax_18']
fig = multigroup(plotdf,xval,yvals,xylabels=['Month','Vmax'], subplots=True)
fig.show()

In [None]:
plotdf = df[df['product_schema'] == 'p10dd2']
xval = 'test_month'
yvals = ['opdb_vmax']
fig = multigroup(plotdf,xval,yvals,xylabels=['Month','Vmax'], subplots=True)
fig.show()

In [None]:
fig = px.ecdf(df, x='opdb_vmax', color='product_schema') #, marginal='histogram')
fig.update_xaxes(range=[1.05, 1.10])
fig.show()

In [None]:
data_table = ibmdata.qdat.table('P10','DTS_POST_RBI')

In [None]:
where_predicates = [data_table.c.component_serial_number.like('B6669778')]

In [None]:
where_predicates

In [None]:
data_table[where_predicates[0]]

In [None]:
a = data_table.c.component_serial_number.like('B6669778')

In [None]:
data_table