In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import seaborn as sns
sns.set(style="white") #white background style for seaborn plots
sns.set(style="whitegrid", color_codes=True)

### Data finding

In [2]:
DATA_DIR = "data/welding_wave"
filenames = os.listdir(DATA_DIR)
df_list = []
type_list = []
for filename in filenames:
    hz = filename.split("_")[0]
    gap_id = filename.split("_")[1]
    gap_type = gap_id[:4]
    welding_order = gap_id.split("mm")[-1].split(".")[0]
    
    type_list.append([hz, gap_type])
    df_list.append(
        pd.read_csv(os.path.join(DATA_DIR, filename), delimiter="\t", header=None,
                   names =["time_order", "ampere", "volt"] ))

In [3]:
modifed_df_list = []
for df, (hz, gap_type)in zip(df_list, type_list):
    df["hz"] = hz
    df["gap_type"] = gap_type
    df["gap_type"] = gap_type
    modifed_df_list.append(df)
all_df = pd.concat(modifed_df_list).reset_index(drop=True)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

X_df = all_df.copy()
X_df
X_df["hz"] = X_df.hz.replace({"2000Hz" : 0, "4000Hz" : 1})
X_df = X_df[['ampere', 'volt', 'hz']]

le = preprocessing.LabelEncoder()
le.fit(all_df.gap_type.unique())
y_df = all_df.gap_type


X = X_df.values
y = le.transform(all_df.gap_type.values) 
X_dev, X_test, y_dev, y_test = train_test_split(X, y)

In [15]:
from sklearn.model_selection import KFold
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import accuracy_score


def get_welding_wave_classification_result(clf,  X_dev, y_dev, n_polynomial=1, kfold_split=3):
    kf = KFold(n_splits=kfold_split, shuffle=True)
    result_val = []
    result_train = []

    for train_idx, val_idx in kf.split(X_dev):
        X_train = X_dev[train_idx, :]
        X_val = X_dev[val_idx, :]
        y_train = y_dev[train_idx]
        y_val = y_dev[val_idx]

        poly = PolynomialFeatures(n_polynomial)

        X_train = poly.fit_transform(X_train)
        clf.fit(X_train, y_train) 

        X_val = poly.fit_transform(X_val)
        y_pred = clf.predict(X_val)
        y_true =  y_val
        result_val.append(accuracy_score(y_pred, y_true))

        y_pred = clf.predict(X_train)
        y_true = y_train
        result_train.append(accuracy_score(y_pred, y_true))
    return np.mean(result_train), np.mean(result_val)

In [16]:
from xgboost.sklearn import XGBClassifier
estimator = XGBClassifier()

In [17]:
get_welding_wave_classification_result(
    estimator, X_dev, y_dev
)

(0.4726641748010145, 0.4715952378505543)

### Add time-series analysis

In [18]:
X_df = all_df.copy()

shift_big_list = []
rolling_big_list = []
# window_size = 11
window_size = 11

for hz in ["2000Hz", "4000Hz"]:
    for gap_type in ["Gap0", "Gap2", "Gap4"]:
        target_df = X_df[(X_df["hz"] == hz) & (X_df["gap_type"] == gap_type)][['ampere', 'volt']]
        shift_small_list = []
        rolling_small_list = []
        for i in range(1, window_size):
            names = target_df.columns
            df = target_df.shift(i)
            df.columns = [name + "_shift_" + str(i) for name in names]
            shift_small_list.append(df)

            af = target_df.rolling(i).mean()
            af.columns = [name + "_rolling_" + str(i) for name in names]
            rolling_small_list.append(af)

        shift_big_list.append(pd.concat(shift_small_list, axis=1))
        rolling_big_list.append(pd.concat(rolling_small_list, axis=1))
        

X_df = pd.merge(X_df, pd.concat(shift_big_list).dropna(), how="inner" , left_index=True, right_index=True)
X_df = pd.merge(X_df, pd.concat(rolling_big_list).dropna(), how="inner" , left_index=True, right_index=True)


X_df["hz"] = X_df.hz.replace({"2000Hz" : 0, "4000Hz" : 1})
X_df["log_am"] = np.log(X_df['ampere'] + 100)
X_df["log_volt"] = np.log(X_df['volt'] + 100)



le = preprocessing.LabelEncoder()
le.fit(X_df.gap_type.unique())
y = le.transform(X_df.pop("gap_type").values) 
X = X_df.values

X_dev, X_test, y_dev, y_test = train_test_split(X, y)

In [19]:
estimator = XGBClassifier()
get_welding_wave_classification_result(
    estimator, X_dev, y_dev
)

(0.5677327130146684, 0.566729972712078)

In [20]:
from sklearn.ensemble import RandomForestClassifier 

get_welding_wave_classification_result(
    RandomForestClassifier(n_jobs = 6), X_dev, y_dev, n_polynomial=1
)

(0.9897279126149755, 0.5916463105935817)

In [None]:
from sklearn.neural_network import MLPClassifier

get_welding_wave_classification_result(
    MLPClassifier((64,32,16)), X_dev, y_dev, n_polynomial=1
)

In [None]:
from xgboost import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from stacking import MyStackingClassifier

In [None]:
estimator1 = XGBClassifier(max_depth=3, learning_rate=0.2, n_estimators=50)
estimator2 = LGBMClassifier(max_depth=3, learning_rate=0.2, n_estimators=50)
estimator3 = RandomForestClassifier(n_estimators=500, max_depth=3)
estimator4 = LinearSVC()
estimator5 = MLPClassifier(hidden_layer_sizes=(512,256, 32))

base_estimators = [estimator1, estimator2, estimator3, estimator4, estimator5]

In [None]:
for estimator in base_estimators:
    stack_classifier = MyStackingClassifier(copy.copy(estimator), base_estimators)
    result = cross_val_score(stack_classifier, X, y, scoring="accuracy" , cv=5).mean()
    print(result)