In [51]:
# update_db.py
import os
import pandas as pd
import sqlalchemy as sql
from sqlalchemy.sql.schema import Column
from sqlalchemy.sql.sqltypes import String

# print("file path:", os.getcwd()) # root should in ./tdtoolkit_web
raw_root = os.path.join('.','src','app', 'raw')

path = {
    "axo": os.path.join(raw_root, "AXO"),
    "rdl": os.path.join(raw_root, "RDL"),
    "opt": os.path.join(raw_root, "OPT"),
    "rt": os.path.join(raw_root, "RT"),
    "cond": os.path.join(raw_root, "CONDITIONS"),
    "prop": os.path.join(raw_root, "PROPERTY"),
    "ref": os.path.join(raw_root, "REF"),
    "output": "output",
    "db": "database"
}

# need to separate later
def axo_load(path, cond=pd.DataFrame()):
    """
    The path and the file name need to be below:
    Could be multiple file
    ─┬─ folder[path]
     ├─ 1-1 + 1-2 + 1-3.csv
     ├─ 1-4.csv
     ...
    """
    df = pd.DataFrame()
    # may wrong, need caution
    loc = [5, 3, 1, 6, 4, 2]
    for cwd, dir_name, file_names in os.walk(path):
        for f in file_names:
            file = os.path.join(cwd, f)
            # get short-id from file name
            # Cause there may be multiple panel in one file, we need to
            # deal with it.
            short_id = f.split(".")[0]
            short_id = [s.strip() for s in short_id.split("+")]
            # multiple each point 6 times cause there are 6 point in one panel
            # maybe I should split this function to another place?
            short_id_6 = [id for id in short_id for _ in range(6)]
            location_6 = loc * len(short_id)
            # Todo: more error format handling
            
            tmp_df = pd.read_csv(file, engine="python", skiprows=27, skipfooter=92)
            tmp_df.insert(loc = 1, column = "short-id", value=short_id_6)
            tmp_df.insert(loc = 2, column = "point", value = location_6)
            # some data has different title(?), so we rename it to make concat well 
            tmp_df.columns = ["Chip No.", "Short-id",  "Point", "x", "y", "cell gap", "top rubbing direct", "twist", "top pre-tilt", "bottom pre-tilt", "rms", "iteration"]
            df = pd.concat([df, tmp_df], ignore_index=True)
    # replace short-id if you have condition table
    if cond.empty != True:
        df = df.rename(columns={"Short-id": "ID", "Chip No.": "LC"})
        df["ID"] = df["ID"].map(dict(cond[["Short-id", "ID"]].values))
        df["LC"] = df["ID"].map(dict(cond[["ID", "LC"]].values))
        df["Project"] = df["ID"].map(dict(cond[["ID", "Project"]].values))
        df["Batch"] = df["ID"].map(dict(cond[["ID", "Batch"]].values))
        # neglect the data that doesn't record
        df = df[~df["ID"].isna()]
        df.columns = ["LC", "ID", "Point", "x", "y", "cell gap", "top rubbing direct", "twist", "top pre-tilt", "bottom pre-tilt", "rms", "iteration", "Project", "Batch"]

    return df

def rdl_load(path, cond=pd.DataFrame()):
    """
    The path and the file name need to be below:
    Should be single file.
    ─┬─ folder[path]
     └─ [cell gap].xlsx
    """
    file = next(os.walk(path))[2][0]
    df = pd.read_excel(os.path.join(path, file))
    if cond.empty != True:
        df = df.rename(columns={"Short-id": "ID"})
        df["ID"] = df["ID"].map(dict(cond[["Short-id", "ID"]].values))
        df["LC"] = df["ID"].map(dict(cond[["ID", "LC"]].values))
        df["Project"] = df["ID"].map(dict(cond[["ID", "Project"]].values))
        df["Batch"] = df["ID"].map(dict(cond[["ID", "Batch"]].values))
        # neglect the data that doesn't record
        df = df[~df["ID"].isna()]
        df.columns = ['ID', 'cell gap', 'LC', "Project", "Batch"]
    return df

def opt_load(path, cond=pd.DataFrame()):
    """
    The path and the file name need to be below:
    Could be multiple file
    ─┬─ folder[path]
     ├─ xxxx.csv
     ├─ xxxx.csv
     ...
    """
    df = pd.DataFrame()
    for cwd, dir_name, file_names in os.walk(path):
        for f in file_names:
            if f[0] == ".":
                continue
            if f[0] == "~":
                continue
            file = os.path.join(cwd, f)
            tmp_df = pd.read_csv(file, encoding="ansi").iloc[:,:64]
            # some data preprocessing
            df = pd.concat([df, tmp_df], ignore_index=True)
            
    df.columns = ['Data', 'M_Time', 'ID', 'Point', 'Station', 'Operator', 'Voltage',
        'I.Time', 'AR_T%(⊥)', 'AR_T%(//)', 'LCM_X%', 'LCM_Y%', 'LCM_Z%', 'RX',
        'RY', 'RZ', 'GX', 'GY', 'GZ', 'BX', 'BY', 'BZ', 'WX', 'WY', 'WZ', 'CG%',
        'R_x', 'R_y', 'G_x', 'G_y', 'B_x', 'B_y', 'W_x', 'W_y', 'RX_max',
        'GY_max', 'BZ_max', 'V_RX_max', 'V_GY_max', 'V_BZ_max', "WX'", "WY'",
        "WZ'", "W_x'"," W_x'.1", 'LCM_X%max', 'LCM_Y%max', 'LCM_Z%max',
        'φ_(Ymax)', 'φ_(Ymax).1', 'φ_(Zmax)', 'φ_tol_X', 'φ_tol_Y', 'φ_tol_Z',
        'T0/Tmax_X', 'T0/Tmax_Y', 'T0/Tmax_Z', 'Vcri_X', 'Vcri_Y', 'Vcri_Z',
        'dφ_X', 'dφ_Y', 'dφ_Z', "LC"]
    # voltage == 1 is the wrong rows, need drop
    df = df[df["Voltage"] != 1]
    if cond.empty != True:
        df["LC"] = df["ID"].map(dict(cond[["ID", "LC"]].values))
        df["Project"] = df["ID"].map(dict(cond[["ID", "Project"]].values))
        df["Batch"] = df["ID"].map(dict(cond[["ID", "Batch"]].values))
    return df

def rt_load(path, cond=pd.DataFrame()):
    """
    The path and the file name need to be below:
    Could be multiple file
    ─┬─ folder[path]
     ├─ xxxx.txt
     ├─ xxxx.txt
     ...
    """
    df = pd.DataFrame()
    for cwd, dir_name, file_names in os.walk(path):
        for f in file_names:
            if f[0] == ".":
                continue
            if f[0] == "~":
                continue
            file = os.path.join(cwd, f)
            tmp_df = pd.read_table(file, encoding="ansi")
            # some system encoding would go wrong, so I rename here
            # there are some implicit problem, the data should just like this
            tmp_df.columns = ['Date', 'Time', 'ID', 'Point', 'Station', 'Operator', 'cell pos.', 'Target Vpk',
               'Initial Vpk', 'OD_Rise', 'OD_fall', 'Normalized_V', 'Specific_target',
               'Photo Sensor', 'TempSensor', 'Temp', 'Model', 'Rise-mean (10-90)',
               'Rise-stdev (10-90)', 'Fall-mean (10-90)', 'Fall-stdev (10-90)',
               'Rise-mean (5-95)', 'Rise-stdev (5-95)', 'Fall-mean (5-95)',
               'Fall-stdev (5-95)', 'Vcom', 'Flicker', 'Base lv-mean', 'Top lv-mean',
               'WXT (%)', 'BXT (%)', 'WXT_*', 'BXT_*', 'Overshooting or not',
               'Overshooting %', 'TailTime', 'overshooting_peak', 'overshooting_top',
               '(RisePeak-top)/top', '(FallPeak-base)/base', 'delta_peak', 'delta_v',
               'delta_m', 'c_a', 'peak', 'top', 'HLH_(Peak-Top)', 'HLH_area']
            # neglect incorrect rows
            tmp_df = tmp_df[tmp_df["Point"].isin([1, 2, 3, 4, 5, 6, '1', '2', '3', '4', '5', '6'])]
            # correct the data types
            for col in tmp_df.columns:
                try:
                    tmp_df[col] = tmp_df[col].astype('float')
                except:
                    continue
            df = pd.concat([df, tmp_df], ignore_index=True)
    
    if cond.empty != True:
        df["LC"] = df["ID"].map(dict(cond[["ID", "LC"]].values))
        df["Project"] = df["ID"].map(dict(cond[["ID", "Project"]].values))
        df["Batch"] = df["ID"].map(dict(cond[["ID", "Batch"]].values))
    df = df[df["ID"]!="NAN"]
    
    return df

def cond_load(path):
    """
    The path and the file name need to be below:
    Should be single file.
    ─┬─ folder[path]
     └─ [cond].xlsx
    """
    file = next(os.walk(path))[2][0]
    df = pd.read_excel(os.path.join(path, file))
    df = df.iloc[:,0:5]
    df.columns = ["ID", "LC", "Short-id", "Project", "Batch"]
    return df

def prop_load(path):
    """
    The path and the file name need to be below:
    Should be single file.
    ─┬─ folder[path]
     └─ [prop].xlsx
    """
    file = next(os.walk(path))[2][0]
    df = pd.read_excel(os.path.join(path, file))
    df['Scatter index'] = (df['n_e'] ** 2 - df['n_o'] ** 2) * 3 / df['K11(pN)'] + df['K22(pN)'] + df['K33(pN)']
    df['RT index'] = df['rotation viscosity (γ1)(mPa⋅s)'] / df['K22(pN)']
    return df

def ref_load(path):
    """
    The path and the file name need to be below:
    Should be single file.
    ─┬─ folder[path]
     └─ [ref].xlsx
    """
    file = next(os.walk(path))[2][0]
    df = pd.read_excel(os.path.join(path, file))
    return df

# loading data from raw
cond = cond_load(path["cond"])
axo = axo_load(path["axo"], cond)
rdl = rdl_load(path["rdl"], cond)
opt = opt_load(path["opt"], cond)
rt = rt_load(path["rt"], cond)
prop = prop_load(path["prop"])
ref = ref_load(path["ref"])


# writing to database

engine = sql.create_engine('sqlite:///database/test.db', echo=False)
# engine = sql.create_engine('sqlite://', echo=True)

meta = sql.MetaData()
# Model 是否要分出去?
sql.Table(
    "cond", meta,
    sql.Column("LC", sql.String),
    sql.Column("Short-id", sql.String),
    sql.Column("ID", sql.String, unique=True),
    sql.Column("Project", sql.String),
    sql.Column("Batch", sql.String)
)

meta.create_all(engine)

## check constrain
try:
    cond.to_sql("cond", con=engine, if_exists="append", index=False)
    axo.to_sql("axo", con=engine, if_exists="append", index=False)
    rdl.to_sql("rdl", con=engine, if_exists="append", index=False)
    opt.to_sql("opt", con=engine, if_exists="append", index=False)
    rt.to_sql("rt", con=engine, if_exists="append", index=False)
    prop.to_sql("prop", con=engine, if_exists="append", index=False)
    ref.to_sql("ref", con=engine, if_exists="append", index=False)
    print("Database update!")
except:
    print("Something wrong, maybe the ID condition is duplicate. Database keep")

Database update!


In [None]:
# calculate_summary.py

In [44]:
import sys
import os
import numpy as np
import pandas as pd
import sqlalchemy as sql
from scipy.interpolate import interp1d
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn import linear_model
# from xgboost import XGBRegressor

In [2]:
# batch = "RD11006008" # X50
batch = "RD11001105" # 1098 TR2

engine = sql.create_engine('sqlite:///database/test.db', echo=False)

cond = pd.read_sql(f"SELECT * FROM cond WHERE batch == \"{batch}\"", engine)
axo = pd.read_sql(f"SELECT * FROM axo WHERE batch == \"{batch}\"", engine)
rdl = pd.read_sql(f"SELECT * FROM rdl WHERE batch == \"{batch}\"", engine)
opt = pd.read_sql(f"SELECT * FROM opt WHERE batch == \"{batch}\"", engine)
rt = pd.read_sql(f"SELECT * FROM rt WHERE batch == \"{batch}\"", engine)
prop = pd.read_sql(f"SELECT * FROM prop", engine)
ref = pd.read_sql(f"SELECT * FROM ref WHERE batch == \"{batch}\"", engine)


In [76]:
def custom_f(X):
    features = np.empty(shape=(len(X), 5), dtype=float)
    features[:, 0] = 1
    features[:, 1] = X[:, 0]
    features[:, 2] = X[:, 1]
    features[:, 3] = X[:, 0] * X[:, 1]
    features[:, 4] = X[:, 0] ** 2
    return features
transformer = FunctionTransformer(custom_f)

In [82]:
ref_Tr = ref["Tr(ms)"][0]
ref_cell_gap = ref["cell gap(um)"][0]
ref_LC = ref["LC"][0]

# check is there axo data
if len(axo) != 0:
    rt_cell_gap = pd.merge(rt, axo[["ID", "Point", "cell gap"]], how="left", on=["ID", "Point"])
else:
    rt_cell_gap = pd.merge(rt, rdl[["ID", "cell gap"]], how="left", on="ID")
    
df = rt_cell_gap[rt_cell_gap["LC"] == ref_LC].copy()
df["Tr"] = df["Rise-mean (10-90)"]
df["Vop"] = df["Target Vpk"]

df = df.groupby(by=["ID", "Vop", "Point"], as_index=False).mean()

# sns.scatterplot(data=df, x="Vop", y="Tr")

model = {}
# Let's try some fasion ML (XD
training_set, test_set = train_test_split(
    df,
    test_size = 0.2,
    random_state = 42
)
X_train = training_set[["Tr", "cell gap"]].to_numpy()
y_train = training_set["Vop"].to_numpy()
X_test = test_set[["Tr", "cell gap"]].to_numpy()
y_test = test_set["Vop"].to_numpy()
valid_data = [[ref_Tr, ref_cell_gap]]

# # eXtreme Grandient Boostng Regression
# # -> Although it can easily get high R2_score, Hard to get physics trend.
# model["Vop_ref_XGBR"] = XGBRegressor(
#     n_estimators = 50,
#     learning_rate = 0.1,
#     max_depth = 3,
#     gamma = 0.01,
#     reg_lambda = 0.01
# )
# model["Vop_ref_XGBR"].fit(
#     X_train, y_train,
#     early_stopping_rounds = 10,
#     eval_set = [(X_test, y_test)],
#     verbose = False
# )

# Linear regression

model["Vop_ref_LR"] = Pipeline([
    ('Scalar', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2)),
    ('linear', linear_model.LinearRegression(fit_intercept=False))]
).fit(
    X_train, y_train,
)

print("R2_train:", model["Vop_ref_LR"].score(X_train, y_train))
print("R2_test:", model["Vop_ref_LR"].score(X_test, y_test))
ref_Vop = float(model["Vop_ref_LR"].predict(valid_data))
print("Vop from Ref[Tr, cell gap]:", ref_Vop)

# Calculate RT, Tf, Tr
df = rt_cell_gap.copy()
df["Vop"] = df["Target Vpk"]
df["RT"] = df["Rise-mean (10-90)"] + df["Fall-mean (10-90)"]
df["Tr"] = df["Rise-mean (10-90)"]
df["Tf"] = df["Fall-mean (10-90)"]
training_set, test_set = train_test_split(
    df,
    test_size = 0.1,
)

model["rt"] = {}

for LC in cond["LC"].unique():
    print(LC)
    model["rt"][LC] = {}
    X_train = training_set[training_set["LC"]==LC][["Vop", "cell gap"]].to_numpy()
    X_test = test_set[test_set["LC"]==LC][["Vop", "cell gap"]].to_numpy()
    valid_data = [[ref_Vop, ref_cell_gap]]
    
    for item in ["Tr", "Tf", "RT"]:
        y_train = training_set[training_set["LC"]==LC][item].to_numpy()
        y_test = test_set[test_set["LC"]==LC][item].to_numpy()

        model["rt"][LC][f"{item}_LR"] = Pipeline([
            ('Scalar', StandardScaler()),
#             ('poly', PolynomialFeatures(degree=1)),
            ('Custom_Transformer', transformer),
            ('linear', linear_model.TheilSenRegressor(fit_intercept=False))
        ]).fit(
            X_train, y_train,
        )
        print(f'R2_test {model["rt"][LC][f"{item}_LR"].score(X_test, y_test):.2f}')
        ans = float(model["rt"][LC][f"{item}_LR"].predict(valid_data))
        print(f"{LC}: {item}: {ans:.2f} ms")
        print()


R2_train: 0.982181877589628
R2_test: 0.9808685313384988
Vop from Ref[Tr, cell gap]: 3.8640432213475027
LCT-15-1098
R2_test 0.96
LCT-15-1098: Tr: 15.32 ms

R2_test 0.79
LCT-15-1098: Tf: 10.90 ms

R2_test 0.92
LCT-15-1098: RT: 26.36 ms

SLC19V33L00
R2_test 0.95
SLC19V33L00: Tr: 15.34 ms

R2_test 0.77
SLC19V33L00: Tf: 12.71 ms

R2_test 0.90
SLC19V33L00: RT: 27.97 ms

SLC20V87L00
R2_test 0.95
SLC20V87L00: Tr: 15.65 ms

R2_test 0.08
SLC20V87L00: Tf: 11.14 ms

R2_test 0.87
SLC20V87L00: RT: 26.99 ms

AV369-031-TA
R2_test 0.95
AV369-031-TA: Tr: 15.35 ms

R2_test 0.63
AV369-031-TA: Tf: 10.83 ms

R2_test 0.90
AV369-031-TA: RT: 26.19 ms

ZIX-7054XX
R2_test 0.94
ZIX-7054XX: Tr: 15.26 ms

R2_test 0.52
ZIX-7054XX: Tf: 11.06 ms

R2_test 0.86
ZIX-7054XX: RT: 26.40 ms



In [79]:
# plot for checking
# maybe plot_RT.py?
%matplotlib widget
import matplotlib.pyplot as plt
# ref Vop
# raw
xlabel = "Tr"
ylabel = "cell gap"
zlabel = "Vop"
# plt.figure(figsize=(10,8))
ax = plt.axes(projection="3d")
plot_raw = df[df["LC"] == ref_LC]
ax.scatter(plot_raw[xlabel],plot_raw[ylabel],plot_raw[zlabel], label='raw')
# fitting
x_range = np.linspace(plot_raw[xlabel].min(), plot_raw[xlabel].max(), 50)
y_range = np.linspace(plot_raw[ylabel].min(), plot_raw[ylabel].max(), 50)
x_range, y_range = np.meshgrid(x_range, y_range)
predict_region = np.array(list(zip(x_range.flatten(), y_range.flatten())))
z_predict = model['Vop_ref_LR'].predict(predict_region)
ax.scatter(x_range, y_range, z_predict, label="fitting surface", alpha=0.1)
plt.title(ref_LC)
plt.legend
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_zlabel(zlabel)
plt.show()

# sample Tr
xlabel = "Vop"
ylabel = "cell gap"
zlabel = "Tr"
for LC in cond["LC"].unique():
    plt.figure(figsize=(5,4))
    ax = plt.axes(projection="3d")
    plot_raw = df[df["LC"] == LC]
    ax.scatter(plot_raw[xlabel],plot_raw[ylabel],plot_raw[zlabel], label=LC)
    # fitting
    x_range = np.linspace(plot_raw[xlabel].min()-0.1, plot_raw[xlabel].max()+0.1, 50)
    y_range = np.linspace(plot_raw[ylabel].min()-0.1, plot_raw[ylabel].max()+0.1, 50)
    x_range, y_range = np.meshgrid(x_range, y_range)
    predict_region = np.array(list(zip(x_range.flatten(), y_range.flatten())))
    z_predict = model["rt"][LC][f"Tr_LR"].predict(predict_region)
    ax.scatter(x_range, y_range, z_predict, label="fitting surface", alpha=0.1)
    plt.title(LC)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_zlabel(zlabel)
    plt.show()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [143]:
from scipy.interpolate import interp1d
data = opt[(opt.ID=="T19BR001NL2X") & (opt.Point ==1)]
x = data["Voltage"]
y = data["LCM_Y%"]
f = interp1d(x, y, kind='cubic')
x_dense = np.linspace(0, 20, 1000)
y_dense = f(x_dense)

plt.figure(figsize=(5,4))
plt.plot(x_dense, y_dense, )


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x2b3ed10ab50>]

In [169]:
def opt_features_extract(X):
    features = np.empty(shape=(len(X), 6), dtype=float)
    features[:, 0] = 1
    features[:, 1] = X[:, 0]
    features[:, 2] = X[:, 1]
    features[:, 3] = X[:, 0] * X[:, 1]
    features[:, 4] = X[:, 0] ** 2
    features[:, 5] = X[:, 0] ** 3

    return features
transformer_opt = FunctionTransformer(opt_features_extract)

In [170]:
# check is there axo data
if len(axo) != 0:
    opt_cell_gap = pd.merge(opt, axo[["ID", "Point", "cell gap"]], how="left", on=["ID", "Point"])
else:
    opt_cell_gap = pd.merge(opt, rdl[["ID", "cell gap"]], how="left", on="ID")

model["opt"] = {}
df = opt_cell_gap.copy()
# some mapping and rename
df["T%"] = opt_cell_gap.groupby(by=["ID", "Point"])["LCM_Y%"].apply(lambda x: 100*x / float(x.max()))
df["Vop"] = df["Voltage"]/2.0
df["LC%"] = df["LCM_Y%"]
df["Wx"] = df["W_x"]
df["Wy"] = df["W_y"]
# the varient is large when Vop is low, so I cut-off at Vop = 2
df = df[df["Vop"] > 3]
training_set, test_set = train_test_split(
    df,
    test_size = 0.2,
#     random_state = 42
)

for LC in cond["LC"].unique():
    model["opt"][LC] = {}
    X_train = training_set[training_set["LC"]==LC][["Vop", "cell gap"]].to_numpy()
    X_test = test_set[test_set["LC"]==LC][["Vop", "cell gap"]].to_numpy()
    valid_data = [[ref_Vop, ref_cell_gap]]
    for item in ["T%", "LC%"]:
        y_train = training_set[training_set["LC"]==LC][item].to_numpy()
        y_test = test_set[test_set["LC"]==LC][item].to_numpy()
        model["opt"][LC][f'{item}_LR'] = Pipeline([
            ('Scalar', StandardScaler()),
#             ('poly', PolynomialFeatures(degree=3)),
            ('Custom_Transformer', transformer_opt),
            ('linear', linear_model.TheilSenRegressor(fit_intercept=False)),
#             ('linear', linear_model.LinearRegression(fit_intercept=False)),
#             ("GR", GaussianProcessRegressor(kernel=DotProduct()+WhiteKernel()))
        ]).fit(
            X_train, y_train,
        )
        print(f'R2_test {model["opt"][LC][f"{item}_LR"].score(X_test, y_test):.2f}')
        ans = float(model["opt"][LC][f"{item}_LR"].predict(valid_data))
        print(f"{LC}: {item}: {ans:.4f}")
        print()
    for item in ["Wx", "Wy"]:
        y_train = training_set[training_set["LC"]==LC][item].to_numpy()
        y_test = test_set[test_set["LC"]==LC][item].to_numpy()
        model["opt"][LC][f'{item}_LR'] = Pipeline([
            ('Scalar', StandardScaler()),
            ('poly', PolynomialFeatures(degree=2)),
#             ('Custom_Transformer', transformer),
            ('linear', linear_model.TheilSenRegressor(fit_intercept=False))
        ]).fit(
            X_train, y_train,
        )
        print(f'R2_test {model["opt"][LC][f"{item}_LR"].score(X_test, y_test):.2f}')
        ans = float(model["opt"][LC][f"{item}_LR"].predict(valid_data))
        print(f"{LC}: {item}: {ans:.4f}")
        print()
   

R2_test 0.97
LCT-15-1098: T%: 79.3172

R2_test 0.97
LCT-15-1098: LC%: 0.6149

R2_test 0.96
LCT-15-1098: Wx: 0.2779

R2_test 0.97
LCT-15-1098: Wy: 0.2791

R2_test 0.95
SLC19V33L00: T%: 84.8840

R2_test 0.96
SLC19V33L00: LC%: 0.6995

R2_test 0.97
SLC19V33L00: Wx: 0.3002

R2_test 0.97
SLC19V33L00: Wy: 0.3092

R2_test 0.96
SLC20V87L00: T%: 79.8688

R2_test 0.97
SLC20V87L00: LC%: 0.6484

R2_test 0.84
SLC20V87L00: Wx: 0.2860

R2_test 0.85
SLC20V87L00: Wy: 0.2907

R2_test 0.97
AV369-031-TA: T%: 78.9670

R2_test 0.97
AV369-031-TA: LC%: 0.6224

R2_test 0.92
AV369-031-TA: Wx: 0.2840

R2_test 0.92
AV369-031-TA: Wy: 0.2879

R2_test 0.95
ZIX-7054XX: T%: 80.7518

R2_test 0.96
ZIX-7054XX: LC%: 0.6464

R2_test 0.97
ZIX-7054XX: Wx: 0.2842

R2_test 0.97
ZIX-7054XX: Wy: 0.2867



In [171]:
# plot for checking
# maybe plot_RT.py?
%matplotlib widget
import matplotlib.pyplot as plt

# sample Tr
xlabel = "Vop"
ylabel = "cell gap"
for item in ["T%", "LC%", "Wx", "Wy"]:
    zlabel = item
#     for LC in cond["LC"].unique():
    for LC in ["LCT-15-1098"]:
        plt.figure(figsize=(5,4))
        ax = plt.axes(projection="3d")
        plot_raw = df[df["LC"] == LC]
        ax.scatter(plot_raw[xlabel],plot_raw[ylabel],plot_raw[zlabel], label=LC)
        # fitting
        x_range = np.linspace(plot_raw[xlabel].min()-0.1, plot_raw[xlabel].max()+0.1, 50)
        y_range = np.linspace(plot_raw[ylabel].min()-0.1, plot_raw[ylabel].max()+0.1, 50)
        x_range, y_range = np.meshgrid(x_range, y_range)
        predict_region = np.array(list(zip(x_range.flatten(), y_range.flatten())))
        z_predict = model["opt"][LC][f"{item}_LR"].predict(predict_region)
        ax.scatter(x_range, y_range, z_predict, label="fitting surface", alpha=0.1)
        plt.title(LC)
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        ax.set_zlabel(zlabel)
        plt.show()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [156]:
np.round(2.51, decimals=1)

2.5

In [172]:
# Generate table
summary_table = pd.DataFrame(
    columns=["LC", "V90", "V95", "V99", "Vmax", "Vop(V)", "Vop_T%", "Δnd(nm)", "Gap(um)", "LC%", "Wx", "Wx_gain", "Wy", "Wy_gain", "u'", "v'", "Ea", "Eb", "ΔEab", "CR", "ΔCR", "T%", "Scatter", "D", "W", "Tr(ms)", "Tf(ms)", "RT(ms)", "G2G(ms)"]
)
# cell gap range
# +- 0.5 um, precise to 0.1 um
center_cell_gap = np.round(ref_cell_gap, decimals=1)
cell_gap_range = np.linspace(center_cell_gap-0.5, center_cell_gap+0.5, 11)
for LC in cond["LC"].unique():
    for cell_gap in cell_gap_range:
        summary_table = summary_table.append({"LC": LC, "Gap(um)": cell_gap}, ignore_index=True)
        # rt
        X = [[ref_Vop, cell_gap]]
        Tr = model["rt"][LC]["Tr_LR"].predict(X)
        Tf = model["rt"][LC]["Tf_LR"].predict(X)
        RT = model["rt"][LC]["RT_LR"].predict(X)
        summary_table.loc[((summary_table["LC"] == LC) & (summary_table["Gap(um)"] == cell_gap)), "RT(ms)"] = RT[0]
        summary_table.loc[((summary_table["LC"] == LC) & (summary_table["Gap(um)"] == cell_gap)), "Tr(ms)"] = Tr[0]
        summary_table.loc[((summary_table["LC"] == LC) & (summary_table["Gap(um)"] == cell_gap)), "Tf(ms)"] = Tf[0]
        # opt
        Wx = model["opt"][LC]["Wx_LR"].predict(X)
        Wy = model["opt"][LC]["Wy_LR"].predict(X)
        T = model["opt"][LC]["T%_LR"].predict(X)
        LCp = model["opt"][LC]["LC%_LR"].predict(X)
        summary_table.loc[((summary_table["LC"] == LC) & (summary_table["Gap(um)"] == cell_gap)), "Wx"] = Wx[0]
        summary_table.loc[((summary_table["LC"] == LC) & (summary_table["Gap(um)"] == cell_gap)), "Wy"] = Wy[0]
        summary_table.loc[((summary_table["LC"] == LC) & (summary_table["Gap(um)"] == cell_gap)), "T%"] = T[0]
        summary_table.loc[((summary_table["LC"] == LC) & (summary_table["Gap(um)"] == cell_gap)), "LC%"] = LCp[0] * 100


In [173]:
summary_table[["LC", "Gap(um)", "Wx", "Wy", "T%", "LC%", "Tr(ms)", "Tf(ms)", "RT(ms)"]]

Unnamed: 0,LC,Gap(um),Wx,Wy,T%,LC%,Tr(ms),Tf(ms),RT(ms)
0,LCT-15-1098,2.6,0.262657,0.258501,75.684357,48.109875,12.39494,7.637916,19.826464
1,LCT-15-1098,2.7,0.265524,0.262209,76.382989,50.683444,12.957347,8.264746,21.081975
2,LCT-15-1098,2.8,0.268419,0.266038,77.081621,53.257012,13.519753,8.891575,22.337486
3,LCT-15-1098,2.9,0.271342,0.269987,77.780253,55.83058,14.08216,9.518405,23.592997
4,LCT-15-1098,3.0,0.274293,0.274058,78.478886,58.404148,14.644566,10.145235,24.848508
5,LCT-15-1098,3.1,0.277271,0.278248,79.177518,60.977717,15.206973,10.772064,26.104019
6,LCT-15-1098,3.2,0.280278,0.28256,79.87615,63.551285,15.769379,11.398894,27.359531
7,LCT-15-1098,3.3,0.283312,0.286993,80.574782,66.124853,16.331786,12.025724,28.615042
8,LCT-15-1098,3.4,0.286374,0.291546,81.273414,68.698422,16.894193,12.652553,29.870553
9,LCT-15-1098,3.5,0.289464,0.296219,81.972046,71.27199,17.456599,13.279383,31.126064


In [159]:
summary_table

Unnamed: 0,LC,platform,V90,V95,V99,Vmax,Vop(V),Vop_T%,Δnd(nm),Gap(um),...,CR,ΔCR,T%,Scatter,D,W,Tr(ms),Tf(ms),RT(ms),G2G(ms)
0,LCT-15-1098,,,,,,,,,2.6,...,,,64.422418,,,,12.39494,7.637916,19.826464,
1,LCT-15-1098,,,,,,,,,2.7,...,,,71.041515,,,,12.957347,8.264746,21.081975,
2,LCT-15-1098,,,,,,,,,2.8,...,,,75.089093,,,,13.519753,8.891575,22.337486,
3,LCT-15-1098,,,,,,,,,2.9,...,,,77.203244,,,,14.08216,9.518405,23.592997,
4,LCT-15-1098,,,,,,,,,3.0,...,,,78.022063,,,,14.644566,10.145235,24.848508,
5,LCT-15-1098,,,,,,,,,3.1,...,,,78.183642,,,,15.206973,10.772064,26.104019,
6,LCT-15-1098,,,,,,,,,3.2,...,,,78.326077,,,,15.769379,11.398894,27.359531,
7,LCT-15-1098,,,,,,,,,3.3,...,,,79.087461,,,,16.331786,12.025724,28.615042,
8,LCT-15-1098,,,,,,,,,3.4,...,,,81.105888,,,,16.894193,12.652553,29.870553,
9,LCT-15-1098,,,,,,,,,3.5,...,,,85.019451,,,,17.456599,13.279383,31.126064,


In [77]:
(summary_table["LC"] == LC) & (summary_table["Gap(um)"] == cell_gap)

0    True
dtype: bool

In [46]:
test_df1 = pd.DataFrame([["A", 2.5]], columns=["LC", "Gap(um)"])
test_df2 = pd.DataFrame([["A", 2.5]], columns=["LC2", "Gap(um)2"])

In [60]:
tmp_df = pd.DataFrame(columns=df.columns)
tmp_df = tmp_df.append({"LC": "A"}, ignore_index=True)

In [61]:
tmp_df.loc[tmp_df.LC == 'A', 'cell gap'] = 2.0
tmp_df

Unnamed: 0,Data,M_Time,ID,Point,Station,Operator,Voltage,I.Time,AR_T%(⊥),AR_T%(//),...,dφ_Y,dφ_Z,LC,project,batch,point,cell gap,T%,Vop,LC%
0,,,,,,,,,,,...,,,A,,,,2,,,


In [59]:
tmp_df

Unnamed: 0,Data,M_Time,ID,Point,Station,Operator,Voltage,I.Time,AR_T%(⊥),AR_T%(//),...,dφ_Y,dφ_Z,LC,project,batch,point,cell gap,T%,Vop,LC%


In [102]:
# reading property table test
# for now it's just messy trash...
pd.read_excel("old_ref/20200924_液晶data sheet (總整理) .xlsx",sheet_name="量產負型AAS",index_col=False)

Unnamed: 0,產品,*,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Mobile,Mobile.1,...,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,NB,Unnamed: 22,Unnamed: 23
0,Type,*,AAS,,AAS,,AAS,,AAS,AAS,...,AAS,,AAS,AAS,AAS,,,AAS,,
1,LC Name,*,LCT-13-1386,,LCT-16-1381,,LCT-15-1098,,ZYH-7013XX,LCT-16-1228,...,LCT-16-1381,,LCT-17-1336,LCT-19-580,SLC19V33L00,LCT-16-1200,ZIX-7244XX,LCT-19-1113,,
2,廠區,*,"南廠, T1",,T3,,T1,,L6,T6,...,T3,,T6/T3/T2,T6,,,,,,
3,廠商,,Merck,,Merck,,Merck,,JNC,Merck,...,Merck,,Merck,Merck,SliChem,Merck,JNC,Merck,,
4,廠商量測LC溫度(℃),,20,25.0,20,25.0,20,25.0,25,20,...,20,25.0,25,25,25,25,25,20,25.0,30.0
5,Transition Temp.,Tni(℃),84.5,,84.9,84.9,85.2,85.2,80,78.2,...,84.9,84.9,84.6,79.4,79.9,91.1,,77.8,,
6,,Tcn(℃),-30,-30.0,,,-30,-30.0,<-30,-30,...,-30,-30.0,-30,-30,-30,-20,-15,,,
7,cell Gap (um),,,3.1,2.8,2.8,3.1,3.1,3.2,3.2,...,2.8,2.8,2.8,3,2.8,3.65,3.65,3.2,3.2,3.2
8,d*△n (um),,,0.29481,0.29904,0.29372,0.30256,0.29822,0.32,0.32512,...,0.29904,0.29372,0.3234,0.3201,0.32228,0.309885,,0.30848,0.30208,0.29568
9,Optical Anisotropy (589....,ne,1.5774,1.5741,1.5905,1.5872,1.5794,1.5769,1.586,1.5832,...,1.5905,1.5872,1.6015,1.5925,1.6028,1.563,,1.5776,1.5743,1.5711


In [127]:
x = np.linspace(0, 1, 50)
y = np.exp(3*x)*np.sin(np.pi/1.15*x)
plt.figure(figsize=(5,4))
plt.plot(x, y)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x2b3d21426d0>]