In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#T1~T8 까지
import os
import pandas as pd

def load_experiment_data(directory_path):
    # Define the names of the data folders and the label folder
    data_folders = ["Accelerometer_Data", "Force_Data", "Acoustic_Emission_Data"]
    label_folder = "Tool_wear_values"

    # Initialize a dictionary to store dataframes for each experiment
    experiment_data = {}

    # Loop through each experiment number
    for i in range(1, 13):
        experiment_name = f"Expt_{i}"
        experiment_data[experiment_name] = {}

        # Loop through each data folder and load the corresponding CSV
        for folder in data_folders:
            csv_path = os.path.join(directory_path, folder, f"{experiment_name}.csv")
            df = pd.read_csv(csv_path)
            experiment_data[experiment_name][folder] = df

    return experiment_data

# Example usage:
directory_path = '/content/drive/MyDrive/COOP/0814_은수님작업연장/Dataset/T1'  # Replace with the actual path to the T1 directory
experiment_data = load_experiment_data(directory_path)




In [None]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

class FeatureExtractor:
    def __init__(self, normal_ratio=0.5, abnormal_ratio=0.5):
        assert normal_ratio + abnormal_ratio == 1, "The sum of normal_ratio and abnormal_ratio must be 1."
        self.normal_ratio = normal_ratio
        self.abnormal_ratio = abnormal_ratio

    def calculate_features(self, data):
        Z1 = np.mean(np.abs(data))  # 평균값 (Mean Value)
        Z2 = np.sqrt(np.mean(data ** 2))  # RMS
        Z3 = np.std(data)  # 표준편차 (Standard Deviation)
        Z4 = Z2 / Z1  # 형상 계수 (Shape Factor)
        Z5 = np.mean(((np.abs(data - Z1)) / Z3) ** 3)  # 왜도 (Skewness)
        Z6 = np.mean(((np.abs(data - Z1)) / Z3) ** 4)  # 첨도 (Kurtosis)
        Z7 = np.max(np.abs(data))  # 최대값 (Peak Value)
        Z8 = Z7 / Z2  # 첨도 계수 (Crest Factor)
        Z9 = Z7 / Z1  # 충격 계수 (Impulse Factor)
        Z10 = np.sum([(f ** 2) * p for f, p in enumerate(np.abs(np.fft.fft(data)) ** 2)])  # 평균 제곱 주파수 (MSF)
        Z11 = np.mean(np.abs(np.fft.fft(data)) ** 2)  # 파워 스펙트럼의 평균 (MPS)
        Z12 = np.sum([f * p for f, p in enumerate(np.abs(np.fft.fft(data)) ** 2)]) / np.sum(np.abs(np.fft.fft(data)) ** 2)  # 주파수 중심 (FC)
        return [Z1, Z2, Z3, Z4, Z5, Z6, Z7, Z8, Z9, Z10, Z11, Z12]

    def extract_features(self, expt_data):
        normal_features = []
        abnormal_features = []

        for key, df in expt_data.items():  # 각 데이터프레임(Accelerometer, Force, Acoustic)을 순회
            n_normal = int(len(df) * self.normal_ratio)
            n_abnormal = len(df) - n_normal

            normal_data = df.iloc[:n_normal]
            abnormal_data = df.iloc[-n_abnormal:]

            for col in df.columns:
                normal_features.append(self.calculate_features(normal_data[col]))
                abnormal_features.append(self.calculate_features(abnormal_data[col]))

        normal_features = np.array(normal_features).flatten()
        abnormal_features = np.array(abnormal_features).flatten()

        return normal_features, abnormal_features

    def t_test_features(self, normal_features, abnormal_features):
        t_stat, p_value = ttest_ind(normal_features, abnormal_features, equal_var=False)
        return {'t_stat': t_stat, 'p_value': p_value}

    def process_experiment(self, experiment_data):
        feature_results = {}
        t_test_results = {}

        for expt_name, expt_data in experiment_data.items():
            normal_features, abnormal_features = self.extract_features(expt_data)
            feature_results[expt_name] = {'normal': normal_features, 'abnormal': abnormal_features}
            t_test_results[expt_name] = self.t_test_features(normal_features, abnormal_features)

        return feature_results, t_test_results




{'t_stat': -0.09519914339258417, 'p_value': 0.9242716070627832}


In [None]:
# 사용 예시:
# experiment_data는 이미 experiment_data["Expt_1"] 형태로 준비되어 있다고 가정합니다.
extractor = FeatureExtractor(normal_ratio=0.5, abnormal_ratio=0.5)

# Expt_1에 대해 특징을 추출하고 t-검정을 수행
feature_results, t_test_results = extractor.process_experiment({"Expt_1": experiment_data["Expt_1"]})

# t-검정 결과 출력
print(t_test_results["Expt_1"])


In [None]:
# experiment_data는 이미 experiment_data["Expt_1"] 형태로 준비되어 있다고 가정합니다.
extractor = FeatureExtractor(normal_ratio=0.7, abnormal_ratio=0.3)

# Expt_1에 대해 특징을 추출하고 t-검정을 수행
feature_results, t_test_results = extractor.process_experiment({"Expt_1": experiment_data["Expt_1"]})

# t-검정 결과 출력
print(t_test_results["Expt_1"])


{'t_stat': 1.4494213022979752, 'p_value': 0.15098356628041573}


In [None]:
# experiment_data는 이미 experiment_data["Expt_1"] 형태로 준비되어 있다고 가정합니다.
extractor = FeatureExtractor(normal_ratio=0.8, abnormal_ratio=0.2)

# Expt_1에 대해 특징을 추출하고 t-검정을 수행
feature_results, t_test_results = extractor.process_experiment({"Expt_1": experiment_data["Expt_1"]})

# t-검정 결과 출력
print(t_test_results["Expt_1"])


{'t_stat': 1.4770098575831436, 'p_value': 0.14345732131070804}


In [None]:
# FeatureExtractor 클래스를 정의한 후 아래 코드를 실행합니다.

# FeatureExtractor 클래스를 초기화할 때 normal_ratio와 abnormal_ratio를 설정합니다.
extractor = FeatureExtractor(normal_ratio=0.8, abnormal_ratio=0.2)

# Expt_1부터 Expt_12까지의 데이터에 대해 특징을 추출하고 t-검정을 수행합니다.
all_experiments = {f"Expt_{i}": experiment_data[f"Expt_{i}"] for i in range(1, 13)}

# 모든 실험에 대해 처리
feature_results, t_test_results = extractor.process_experiment(all_experiments)

# 각 실험의 t-검정 결과 출력
for expt_name, results in t_test_results.items():
    print(f"{expt_name}: t_stat = {results['t_stat']}, p_value = {results['p_value']}")


Expt_1: t_stat = 1.4770098575831436, p_value = 0.14345732131070804
Expt_2: t_stat = 1.6371109742765106, p_value = 0.10539333331958933
Expt_3: t_stat = 1.5480029564586022, p_value = 0.12542622274893248
Expt_4: t_stat = 1.81487997843523, p_value = 0.07315462315544669
Expt_5: t_stat = 1.7874344234270378, p_value = 0.07751733185487344
Expt_6: t_stat = 1.7788181860450059, p_value = 0.0789307439599061
Expt_7: t_stat = 1.7285732091319614, p_value = 0.0876037448940448
Expt_8: t_stat = 1.7081089006626833, p_value = 0.09135378815592006
Expt_9: t_stat = 1.5868951449351523, p_value = 0.11633898333724557
Expt_10: t_stat = 1.7818126972400694, p_value = 0.07843730954665154
Expt_11: t_stat = 1.7439608489321814, p_value = 0.08486817051242108
Expt_12: t_stat = 1.7639503248824044, p_value = 0.08141984802449657


In [None]:
import numpy as np

# 최적의 normal_ratio와 abnormal_ratio를 찾기 위한 함수
def find_optimal_ratio(experiment_data, ratios):
    optimal_ratio = None
    min_avg_p_value = float('inf')
    p_values_dict = {}

    for normal_ratio in ratios:
        abnormal_ratio = 1 - normal_ratio
        extractor = FeatureExtractor(normal_ratio=normal_ratio, abnormal_ratio=abnormal_ratio)
        feature_results, t_test_results = extractor.process_experiment(experiment_data)

        # 모든 실험에 대해 평균 p-값 계산
        avg_p_value = np.mean([result['p_value'] for result in t_test_results.values()])
        p_values_dict[normal_ratio] = avg_p_value

        # 최소 평균 p-값과 해당 비율을 저장
        if avg_p_value < min_avg_p_value:
            min_avg_p_value = avg_p_value
            optimal_ratio = normal_ratio

    return optimal_ratio, min_avg_p_value, p_values_dict

# 사용 예시:
ratios = np.arange(0.5, 1.0, 0.1)  # 0.5부터 0.9까지의 비율
all_experiments = {f"Expt_{i}": experiment_data[f"Expt_{i}"] for i in range(1, 13)}

optimal_ratio, min_avg_p_value, p_values_dict = find_optimal_ratio(all_experiments, ratios)

print(f"Optimal Normal Ratio: {optimal_ratio}")
print(f"Minimum Average p-value: {min_avg_p_value}")

# 각 비율에 대한 평균 p-value 출력
for ratio, avg_p_value in p_values_dict.items():
    print(f"Normal Ratio: {ratio}, Average p-value: {avg_p_value}")


Optimal Normal Ratio: 0.7999999999999999
Minimum Average p-value: 0.09532511840231866
Normal Ratio: 0.5, Average p-value: 0.7263697599157614
Normal Ratio: 0.6, Average p-value: 0.18721337383959302
Normal Ratio: 0.7, Average p-value: 0.10371643874201346
Normal Ratio: 0.7999999999999999, Average p-value: 0.09532511840231866
Normal Ratio: 0.8999999999999999, Average p-value: 0.09593891946678691


In [None]:
directory_path2 = '/content/drive/MyDrive/COOP/0814_은수님작업연장/Dataset/T2'  # Replace with the actual path to the T1 directory
experiment_data2 = load_experiment_data(directory_path2)

In [None]:
# 디렉토리 경로 설정
directories = [f'/content/drive/MyDrive/COOP/0814_은수님작업연장/Dataset/T{i}' for i in range(1, 9)]

# 각 디렉토리에 대해 데이터 로드 및 최적 비율 찾기
for i, directory_path in enumerate(directories, 1):
    print(f"Processing T{i}...")
    experiment_data = load_experiment_data(directory_path)

    # 비율 범위 설정
    ratios = np.arange(0.5, 1.0, 0.1)  # 0.5부터 0.9까지의 비율

    # 모든 실험 데이터셋에 대해 최적의 비율 찾기
    all_experiments = {f"Expt_{j}": experiment_data[f"Expt_{j}"] for j in range(1, 13)}
    optimal_ratio, min_avg_p_value, p_values_dict = find_optimal_ratio(all_experiments, ratios)

    # 결과 출력
    print(f"T{i} Optimal Normal Ratio: {optimal_ratio}")
    print(f"T{i} Minimum Average p-value: {min_avg_p_value}")
    print(f"T{i} Average p-values per ratio:")

    for ratio, avg_p_value in p_values_dict.items():
        print(f"Normal Ratio: {ratio}, Average p-value: {avg_p_value}")

    print("\n" + "="*50 + "\n")  # 각 T의 결과를 구분하기 위해 줄 추가


Processing T1...
T1 Optimal Normal Ratio: 0.7999999999999999
T1 Minimum Average p-value: 0.09532511840231866
T1 Average p-values per ratio:
Normal Ratio: 0.5, Average p-value: 0.7263697599157614
Normal Ratio: 0.6, Average p-value: 0.18721337383959302
Normal Ratio: 0.7, Average p-value: 0.10371643874201346
Normal Ratio: 0.7999999999999999, Average p-value: 0.09532511840231866
Normal Ratio: 0.8999999999999999, Average p-value: 0.09593891946678691


Processing T2...
T2 Optimal Normal Ratio: 0.8999999999999999
T2 Minimum Average p-value: 0.07267334580165906
T2 Average p-values per ratio:
Normal Ratio: 0.5, Average p-value: 0.5669823321157501
Normal Ratio: 0.6, Average p-value: 0.13747701637074702
Normal Ratio: 0.7, Average p-value: 0.08123321395205196
Normal Ratio: 0.7999999999999999, Average p-value: 0.0743772649415158
Normal Ratio: 0.8999999999999999, Average p-value: 0.07267334580165906


Processing T3...
T3 Optimal Normal Ratio: 0.7999999999999999
T3 Minimum Average p-value: 0.10299846

T1

In [None]:
import pandas as pd

# CSV 파일 경로
csv_file_path = "/content/drive/MyDrive/COOP/0814_은수님작업연장/Dataset/T1/Tool_Wear_Values/T1_Tool_wear_values_for_all_Experiments.csv"

# 다른 인코딩으로 CSV 파일을 읽어오기
try:
    df = pd.read_csv(csv_file_path, encoding='ISO-8859-1')
except UnicodeDecodeError:
    df = pd.read_csv(csv_file_path, encoding='latin1')

# 데이터프레임 출력 (필요한 경우)
print(df)


          #  Tool Wear in (µm)
0    Expt_1            123.349
1    Expt_2            144.226
2    Expt_3            205.581
3    Expt_4            219.458
4    Expt_5            228.812
5    Expt_6            239.913
6    Expt_7            241.843
7    Expt_8            259.033
8    Expt_9            268.299
9   Expt_10            280.364
10  Expt_11            291.479
11  Expt_12            306.450


T5

In [None]:
# Example usage:
directory_path2 = '/content/drive/MyDrive/COOP/0814_은수님작업연장/Dataset/T5'  # Replace with the actual path to the T1 directory
experiment_data2 = load_experiment_data(directory_path2)



In [None]:
experiment_data2["Expt_8"]

{'Accelerometer_Data':         Vibration in  X (g)  Vibration in Y (g)  Vibration in Z (g)
 0                 -0.001761           -0.009247           -0.003963
 1                 -0.003083           -0.009026           -0.003963
 2                 -0.002862           -0.009907           -0.005284
 3                 -0.001761           -0.010788           -0.007265
 4                 -0.002642           -0.011669           -0.007485
 ...                     ...                 ...                 ...
 337496            -0.168520           -0.116716            0.751681
 337497            -0.131729           -0.313570            1.461918
 337498            -0.130037           -0.141244           -0.212289
 337499             0.088806            0.146742           -1.173510
 337500             0.363682            0.129191           -0.323720
 
 [337501 rows x 3 columns],
 'Force_Data':        Force in X axis (N)  Force in Y axis (N)  Force in Z axis (N)
 0                -3.323361         

In [None]:
experiment_data2["Expt_1"]

{'Accelerometer_Data':         Vibration in  X (g)  Vibration in Y (g)  Vibration in Z (g)
 0                 -0.001321            0.000661           -0.003083
 1                 -0.003743            0.000000           -0.002862
 2                 -0.003743            0.000000           -0.002642
 3                 -0.001541            0.000000           -0.002202
 4                  0.000881           -0.000661           -0.002202
 ...                     ...                 ...                 ...
 337496             0.007904            0.001282            0.007263
 337497             0.008117           -0.002136            0.005341
 337498             0.004914           -0.005127            0.006195
 337499             0.001495           -0.002991            0.007690
 337500             0.000214           -0.001282            0.006408
 
 [337501 rows x 3 columns],
 'Force_Data':        Force in X axis (N)  Force in Y axis (N)  Force in Z axis (N)
 0                14.785800         

In [None]:
experiment_data["Expt_1"]

{'Accelerometer_Data':         Vibration in  X (g)  Vibration in Y (g)  Vibration in Z (g)
 0                 -0.001308            0.000654           -0.003052
 1                 -0.003706            0.000000           -0.002834
 2                 -0.003706            0.000000           -0.002616
 3                 -0.001526            0.000000           -0.002180
 4                  0.000872           -0.000654           -0.002180
 ...                     ...                 ...                 ...
 337496             0.008065            0.001308            0.007411
 337497             0.008283           -0.002180            0.005450
 337498             0.005014           -0.005232            0.006321
 337499             0.001526           -0.003052            0.007847
 337500             0.000218           -0.001308            0.006539
 
 [337501 rows x 3 columns],
 'Force_Data':        Force in X axis (N)  Force in Y axis (N)  Force in Z axis (N)
 0                  15.5640         