In [11]:
# Import required libraries
import os
import logging
import pandas as pd
import numpy as np
from sklearn import metrics
from sktime.transformations.panel.rocket import Rocket
from sklearn.linear_model import RidgeClassifierCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle

In [102]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [198]:
# Define Rocket Transformer Classifier
class RocketTransformerClassifier:
    def __init__(self):
        self.classifiers_mapping = {}

    def fit_rocket(self, x_train, y_train, kernels=10000):
        # Initialize and fit Rocket transformer
        rocket = Rocket(num_kernels=kernels, normalise=False)
        rocket.fit(x_train)
        x_training_transform = rocket.transform(x_train)

        # Normalize the transformed data
        scaler = StandardScaler()
        x_training_transform = scaler.fit_transform(x_training_transform)

        # Train RidgeClassifier with normalized transformed data
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
        classifier.fit(x_training_transform, y_train)

        # Store the transformer, scaler, and classifier
        self.classifiers_mapping["transformer"] = rocket
        self.classifiers_mapping["scaler"] = scaler
        self.classifiers_mapping["classifier"] = classifier

    def evaluate(self, x_val, y_val):
        rocket = self.classifiers_mapping["transformer"]
        scaler = self.classifiers_mapping["scaler"]
        classifier = self.classifiers_mapping["classifier"]
    
        # Transform and normalize test data
        x_val_transform = rocket.transform(x_val)
        x_val_transform = scaler.transform(x_val_transform)
    
        # Predict and evaluate
        predictions = classifier.predict(x_val_transform)
        accuracy = metrics.accuracy_score(y_val, predictions)

        logger.info("-----------------------------------------------")
        logger.info(f"Accuracy: {accuracy}")

        return accuracy


    def predict_rocket(self, x_test, y_test):
        # Retrieve transformer, scaler, and classifier
        rocket = self.classifiers_mapping["transformer"]
        scaler = self.classifiers_mapping["scaler"]
        classifier = self.classifiers_mapping["classifier"]
    
        # Transform and normalize test data
        x_test_transform = rocket.transform(x_test)
        x_test_transform = scaler.transform(x_test_transform)
    
        # Predict and evaluate
        predictions = classifier.predict(x_test_transform)
        accuracy = metrics.accuracy_score(y_test, predictions)
        confusion_matrix = metrics.confusion_matrix(y_test, predictions)
        classification_report = metrics.classification_report(y_test, predictions)
    
        logger.info("-----------------------------------------------")
        logger.info(f"Accuracy: {accuracy}")
        logger.info("\nConfusion Matrix:\n" + str(confusion_matrix))
        logger.info("\nClassification Report:\n" + classification_report)
    
        return accuracy, confusion_matrix, classification_report
    
    
def pad_with_last_row(series, fixed_rows):
    """
    Pad time-series data with the last row to ensure a fixed number of rows.
    Truncate rows if too many, pad with the last row if too few.
    """
    series = np.array(series)
    current_rows, columns = series.shape

    # Truncate if too many rows
    if current_rows > fixed_rows:
        return series[:fixed_rows, :]

    # Pad with the last row if too few rows
    if current_rows < fixed_rows:
        last_row = series[-1, :]  # Get the last row
        padding = np.tile(last_row, (fixed_rows - current_rows, 1))  # Repeat the last row
        return np.vstack((series, padding))  # Add padding rows

    return series

# Define function to load time-series data
def load_time_series_data(input_dir):
    """
    Load all time-series CSV files in the input directory and prepare
    the data for Rocket classifier.
    """
    x_data = []
    y_data = []

    # Iterate through each CSV file
    for file_name in os.listdir(input_dir):
        if file_name.endswith('.csv'):
            file_path = os.path.join(input_dir, file_name)

            # Load CSV file
            df = pd.read_csv(file_path)

            # Extract label and time-series data
            label = df.iloc[0]['class']  # 'class' 열에서 라벨을 가져옵니다.
            # label = df['class'].iloc[0]  # Assuming 'class' column exists in all files
            time_series = df.iloc[:, 3:].values  # Exclude non-time-series columns
            # time_series = time_series[:,:18]
            time_series = pad_with_last_row(time_series,fixed_rows=10)
            
            x_data.append(time_series)
            y_data.append(label)

    return np.array(x_data), np.array(y_data)

def infer_new_data(model_path, csv_file):
    """
    Load a saved RocketTransformerClassifier model and perform inference on a new CSV file.
    """
    import pickle

    # Load the saved model
    with open(model_path, "rb") as f:
        rocket_classifier = pickle.load(f)

    # Load and preprocess the new data
    df = pd.read_csv(csv_file)

    # Extract time-series data
    time_series = df.iloc[:, 1:].values  # Exclude non-time-series columns
    time_series = pad_with_last_row(time_series, fixed_rows=11)  # Ensure fixed number of rows

    # Reshape for inference (Rocket expects 3D array: [samples, time_steps, features])
    x_new = np.expand_dims(time_series, axis=0)  # Add batch dimension

    # Perform inference
    transformer = rocket_classifier.classifiers_mapping["transformer"]
    scaler = rocket_classifier.classifiers_mapping["scaler"]
    classifier = rocket_classifier.classifiers_mapping["classifier"]

    # Transform and normalize the input data
    x_new_transformed = transformer.transform(x_new)
    x_new_transformed = scaler.transform(x_new_transformed)

    # Predict class
    prediction = classifier.predict(x_new_transformed)

    return prediction  # Return the predicted label

from scipy.special import softmax

def infer_top3_classes(model_path, csv_file):
    """
    Load a saved RocketTransformerClassifier model and return the top-3 predicted classes with probabilities.
    """
    import pickle

    # Load the saved model
    with open(model_path, "rb") as f:
        rocket_classifier = pickle.load(f)

    # Load and preprocess the new data
    df = pd.read_csv(csv_file)

    # Extract time-series data
    time_series = df.iloc[:, 1:].values  # Exclude non-time-series columns
    time_series = pad_with_last_row(time_series, fixed_rows=11)  # Ensure fixed number of rows

    # Reshape for inference (Rocket expects 3D array: [samples, time_steps, features])
    x_new = np.expand_dims(time_series, axis=0)  # Add batch dimension

    # Perform inference
    transformer = rocket_classifier.classifiers_mapping["transformer"]
    scaler = rocket_classifier.classifiers_mapping["scaler"]
    classifier = rocket_classifier.classifiers_mapping["classifier"]

    # Transform and normalize the input data
    x_new_transformed = transformer.transform(x_new)
    x_new_transformed = scaler.transform(x_new_transformed)

    # Get decision scores
    decision_scores = classifier.decision_function(x_new_transformed)  # Shape: (1, num_classes)

    # Convert decision scores to probabilities using softmax
    probabilities = softmax(decision_scores, axis=1)[0]  # Shape: (num_classes,)

    # Get top-3 classes and their probabilities
    top3_indices = probabilities.argsort()[-3:][::-1]  # Indices of the top-3 probabilities
    top3_classes = classifier.classes_[top3_indices]   # Corresponding class labels
    top3_probabilities = probabilities[top3_indices]   # Corresponding probabilities

    # Return as a dictionary
    return [{"class": c, "probability": p} for c, p in zip(top3_classes, top3_probabilities)]



In [215]:
### 이 셀 기반으로 위 셀 코드 만듦

# Define Rocket Transformer Classifier
class RocketTransformerClassifier:
    def __init__(self):
        self.classifiers_mapping = {}

    def fit_rocket(self, x_train, y_train, kernels=20000):
        # Initialize and fit Rocket transformer
        rocket = Rocket(num_kernels=kernels, normalise=False)
        rocket.fit(x_train)
        x_training_transform = rocket.transform(x_train)

        # Normalize the transformed data
        scaler = StandardScaler()
        x_training_transform = scaler.fit_transform(x_training_transform)

        # Train RidgeClassifier with normalized transformed data
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
        classifier.fit(x_training_transform, y_train)

        # Store the transformer, scaler, and classifier
        self.classifiers_mapping["transformer"] = rocket
        self.classifiers_mapping["scaler"] = scaler
        self.classifiers_mapping["classifier"] = classifier

    def evaluate(self, x_val, y_val):
        rocket = self.classifiers_mapping["transformer"]
        scaler = self.classifiers_mapping["scaler"]
        classifier = self.classifiers_mapping["classifier"]
    
        # Transform and normalize test data
        x_val_transform = rocket.transform(x_val)
        x_val_transform = scaler.transform(x_val_transform)
    
        # Predict and evaluate
        predictions = classifier.predict(x_val_transform)
        accuracy = metrics.accuracy_score(y_val, predictions)

        logger.info("-----------------------------------------------")
        logger.info(f"Accuracy: {accuracy}")

        return accuracy


    def predict_rocket(self, x_test, y_test):
        # Retrieve transformer, scaler, and classifier
        rocket = self.classifiers_mapping["transformer"]
        scaler = self.classifiers_mapping["scaler"]
        classifier = self.classifiers_mapping["classifier"]
    
        # Transform and normalize test data
        x_test_transform = rocket.transform(x_test)
        x_test_transform = scaler.transform(x_test_transform)
    
        # Predict and evaluate
        predictions = classifier.predict(x_test_transform)
        accuracy = metrics.accuracy_score(y_test, predictions)
        confusion_matrix = metrics.confusion_matrix(y_test, predictions)
        classification_report = metrics.classification_report(y_test, predictions)
    
        logger.info("-----------------------------------------------")
        logger.info(f"Accuracy: {accuracy}")
        logger.info("\nConfusion Matrix:\n" + str(confusion_matrix))
        logger.info("\nClassification Report:\n" + classification_report)
    
        return accuracy, confusion_matrix, classification_report
    
def pad_with_last_row(series, fixed_rows):
    """
    Pad time-series data with the last row to ensure a fixed number of rows.
    Truncate rows if too many, pad with the last row if too few.
    """
    series = np.array(series)
    current_rows, columns = series.shape

    # Truncate if too many rows
    if current_rows > fixed_rows:
        return series[:fixed_rows, :]

    # Pad with the last row if too few rows
    if current_rows < fixed_rows:
        last_row = series[-1, :]  # Get the last row
        padding = np.tile(last_row, (fixed_rows - current_rows, 1))  # Repeat the last row
        return np.vstack((series, padding))  # Add padding rows

    return series

def load_time_series_data(input_dir, max_len = 11):
    """
    Load time-series CSV files from 377 to 382 and ensure all have the same rows and columns.

    Parameters:
        input_dir (str): Directory containing CSV files.
        target_views (list): List of target views (e.g., ['A', 'B', 'C']).
        fixed_rows (int): Fixed number of rows for each time-series.

    Returns:
        x_data (np.array): Filtered and formatted time-series data containing only 'y' values.
        y_data (np.array): Corresponding labels.
    """
    # Include only files with numbers 377 to 382
    included_range = range(377, 383)

    x_data = []
    y_data = []

    for file_name in os.listdir(input_dir):
        if file_name.endswith('.csv'):
            # Extract the number (377, 378, etc.) from the file name
            file_number = int(file_name.split('_')[0])

            # Skip files not in the included range
            if file_number not in included_range:
                continue

            # Extract class label, view, and person_id from file name
            class_label = file_name.split('_')[0]  # First part of the file name

            file_path = os.path.join(input_dir, file_name)
            df = pd.read_csv(file_path)


            # 총 33개의 그룹에서 y 인덱스만 선택
            time_series = df.iloc[:, 3:].values  # Exclude metadata columns
            # time_series = time_series[:,:18]

            # Fix the number of rows
            time_series = pad_with_last_row(time_series,max_len)

            x_data.append(time_series)
            y_data.append(class_label)

    return np.array(x_data), np.array(y_data)


In [216]:
# Load the data
input_dir = '/root/juno/peak_detection_results/segments2'  # 데이터 디렉토리 경로
x_data, y_data = load_time_series_data(input_dir, max_len = 11)

# 데이터 확인
print(f"x_data shape: {x_data.shape}")
print(f"y_data shape: {y_data.shape}")
print(f"Sample label: {y_data[0]}")
print(f"Sample time-series data shape: {x_data[0].shape}")

x_data shape: (383, 11, 36)
y_data shape: (383,)
Sample label: 377
Sample time-series data shape: (11, 36)


In [217]:
x_train, x_temp, y_train, y_temp = train_test_split(x_data, y_data, test_size=0.2, random_state=42, stratify=y_data)

In [218]:
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

In [219]:
# Initialize Rocket classifier
logger.info("Initializing Rocket classifier...")
rocket_classifier = RocketTransformerClassifier()

INFO:__main__:Initializing Rocket classifier...


In [220]:
from tqdm import tqdm
import pickle

best_val_accuracy = 0
patience = 5
epochs_without_improvement = 0
max_epochs = 10

# 저장할 모델 파일 경로
best_model_path = "lateralraise_fin.pkl"

# tqdm을 사용하여 학습률 표시
with tqdm(total=max_epochs, desc="Training Progress") as pbar:
    for epoch in range(max_epochs):
        # Train the model
        rocket_classifier.fit_rocket(x_train, y_train)

        # Evaluate on validation set
        val_accuracy = rocket_classifier.evaluate(x_val, y_val)

        # Update tqdm bar with validation accuracy
        pbar.set_postfix({"Val Accuracy": f"{val_accuracy:.4f}"})
        pbar.update(1)  # Progress bar 업데이트

        # Save the best model
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            epochs_without_improvement = 0

            # Save the model
            with open(best_model_path, "wb") as f:
                pickle.dump(rocket_classifier, f)
            logger.info(f"New best model saved with val_accuracy: {best_val_accuracy:.4f}")
        else:
            epochs_without_improvement += 1

        # Early stopping logic
        if epochs_without_improvement >= patience:
            logger.info("Early stopping triggered.")
            break

# Load the best model after training
with open(best_model_path, "rb") as f:
    best_model = pickle.load(f)

logger.info(f"Best model loaded with val_accuracy: {best_val_accuracy:.4f}")


Training Progress:   0%|          | 0/10 [00:00<?, ?it/s]

INFO:__main__:-----------------------------------------------
INFO:__main__:Accuracy: 0.7368421052631579
Training Progress:  10%|█         | 1/10 [00:10<01:38, 10.95s/it, Val Accuracy=0.7368]INFO:__main__:New best model saved with val_accuracy: 0.7368
INFO:__main__:-----------------------------------------------
INFO:__main__:Accuracy: 0.7894736842105263
Training Progress:  20%|██        | 2/10 [00:21<01:27, 10.90s/it, Val Accuracy=0.7895]INFO:__main__:New best model saved with val_accuracy: 0.7895
INFO:__main__:-----------------------------------------------
INFO:__main__:Accuracy: 0.7631578947368421
Training Progress:  30%|███       | 3/10 [00:32<01:16, 10.94s/it, Val Accuracy=0.7632]INFO:__main__:-----------------------------------------------
INFO:__main__:Accuracy: 0.7894736842105263
Training Progress:  40%|████      | 4/10 [00:43<01:05, 10.92s/it, Val Accuracy=0.7895]INFO:__main__:-----------------------------------------------
INFO:__main__:Accuracy: 0.7631578947368421
Training 

In [221]:
import pickle

# 저장된 최고 성능 모델 파일 경로
best_model_path = "lateralraise_fin.pkl"

# 저장된 모델 로드
logger.info("Loading the best saved model for testing...")
with open(best_model_path, "rb") as f:
    best_rocket_classifier = pickle.load(f)

# 테스트 수행
logger.info("Testing the best Rocket classifier...")
accuracy, confusion_matrix, classification_report = best_rocket_classifier.predict_rocket(x_test, y_test)

# 테스트 결과 출력
logger.info(f"Test Accuracy: {accuracy:.4f}")
logger.info("\nConfusion Matrix:\n" + str(confusion_matrix))
logger.info("\nClassification Report:\n" + classification_report)


INFO:__main__:Loading the best saved model for testing...
INFO:__main__:Testing the best Rocket classifier...
INFO:__main__:-----------------------------------------------
INFO:__main__:Accuracy: 0.7692307692307693
INFO:__main__:
Confusion Matrix:
[[6 0 0 0 0 0]
 [0 5 1 0 0 2]
 [0 0 4 0 1 2]
 [0 0 0 5 0 0]
 [0 1 0 0 5 0]
 [1 1 0 0 0 5]]
INFO:__main__:
Classification Report:
              precision    recall  f1-score   support

         377       0.86      1.00      0.92         6
         378       0.71      0.62      0.67         8
         379       0.80      0.57      0.67         7
         380       1.00      1.00      1.00         5
         381       0.83      0.83      0.83         6
         382       0.56      0.71      0.62         7

    accuracy                           0.77        39
   macro avg       0.79      0.79      0.79        39
weighted avg       0.78      0.77      0.77        39

INFO:__main__:Test Accuracy: 0.7692
INFO:__main__:
Confusion Matrix:
[[6 0 0 0 0

In [227]:
# Define paths
model_path = "/root/juno/lateralraise_fin.pkl"  # 저장된 모델 경로
new_csv_file = "/root/test2.csv"  # 새 데이터 CSV 파일 경로

# Perform inference
predicted_label = infer_new_data(model_path, new_csv_file)[0]

# Print the result
print(f"Predicted label for the new data: {predicted_label}")


Predicted label for the new data: 377


In [None]:
### 테스트용, 별 의미 없음.

# Define paths
model_path = "/root/juno/lateralraise.pkl"  # 저장된 모델 경로
result = []

for i in range(2):
    new_csv_file = f"/root/wrong_{i}.csv"  # 새 데이터 CSV 파일 경로

    # Perform inference
    predicted_label = infer_new_data(model_path, new_csv_file)[0]
    result.append(predicted_label)

    # Print the result
    print(f"Predicted label for the new data: {predicted_label}")

dict = {'377': '정자세','378':'무릎 반동'}

new = [dict[i] for i in result]

print(new)

In [208]:
### 테스트용, 별 의미 없음.

model_path = "/root/juno/lateralraise.pkl"  # 저장된 모델 경로
new_csv_file = "/root/test2.csv"  # 새 데이터 CSV 파일 경로

# Perform inference
top3_predictions = infer_top3_classes(model_path, new_csv_file)

# Print the results
for rank, result in enumerate(top3_predictions, start=1):
    print(f"Rank {rank}: Class = {result['class']}, Probability = {result['probability']:.4f}")


Rank 1: Class = 377, Probability = 0.5192
Rank 2: Class = 378, Probability = 0.3442
Rank 3: Class = 380, Probability = 0.0673


### 위가 수현이가 고쳐준 코드

### 아래는 수현이가 고치기 전 원래 코드

In [9]:
# Save the results to a file
output_dir = '/root/juno'

results_file = os.path.join(output_dir, 'classification_results.txt')
with open(results_file, 'w') as f:
    f.write("Accuracy: {}\n".format(accuracy))
    f.write("\nConfusion Matrix:\n")
    f.write(str(confusion_matrix))
    f.write("\nClassification Report:\n")
    f.write(classification_report)

logger.info("Results saved to: " + results_file)


INFO:__main__:Results saved to: /root/juno/classification_results.txt


In [10]:
# 모델 저장 함수
def save_model(model, output_path):
    with open(output_path, 'wb') as f:
        pickle.dump(model, f)
    logger.info(f"Model saved to {output_path}")

# 모델 로드 함수
def load_model(input_path):
    with open(input_path, 'rb') as f:
        model = pickle.load(f)
    logger.info(f"Model loaded from {input_path}")
    return model

In [27]:
# 저장할 파일 경로
model_save_path = os.path.join(output_dir, 'rocket_classifier.pkl')

In [28]:
# Train 완료 후 모델 저장
logger.info("Saving the trained model...")
save_model(rocket_classifier, model_save_path)

INFO:__main__:Saving the trained model...
INFO:__main__:Model saved to /root/juno/rocket_classifier.pkl


In [29]:
# 이후 필요할 때 모델을 로드하여 사용 가능
logger.info("Loading the model for testing...")
loaded_classifier = load_model(model_save_path)

INFO:__main__:Loading the model for testing...
INFO:__main__:Model loaded from /root/juno/rocket_classifier.pkl


In [30]:
# 로드된 모델로 테스트 수행
accuracy, confusion_matrix, classification_report = loaded_classifier.predict_rocket(x_test, y_test)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
INFO:__main__:-----------------------------------------------
INFO:__main__:Accuracy: 0.7
INFO:__main__:
Confusion Matrix:
[[1 0 0 ... 0 0 0]
 [0 2 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 2 0 0]
 [0 0 0 ... 1 1 0]
 [0 0 0 ... 0 0 1]]
INFO:__main__:
Classification Report:
              precision    recall  f1-score   support

         377       0.50      0.50      0.50         2
         378       0.50      1.00      0.67         2
         379       1.00      0.50      0.67         2
         380       0.67      1.00      0.80         2
         381       1.00      1.00      1.00         2
         382       0.00      0.00      0.00         2
         383       0.75      1.00      0.86         3
         384       0.50      0.33      0.40         3
         385 