In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


In [2]:
from scipy.signal import butter, filtfilt, iirnotch, savgol_filter, medfilt
import numpy as np
import pandas as pd
import os
import pywt
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
import argparse

def design_filters():
    """
    Design specialized filters for EEG signal processing
    """
    def notch_filter(signal, fs=250):
        # Power line interference (50/60 Hz)
        f0 = 50.0  # Notch frequency
        Q = 30.0  # Quality factor
        w0 = f0/(fs/2)
        b, a = iirnotch(w0, Q)
        return filtfilt(b, a, signal)
    
    def eeg_bandpass(signal, fs=250):
        # EEG-specific bandpass (0.5-45 Hz to preserve relevant brain activity)
        nyquist = fs/2
        b, a = butter(4, [0.5/nyquist, 45/nyquist], btype='band')
        return filtfilt(b, a, signal)
    
    return notch_filter, eeg_bandpass

def wavelet_denoising(signal, wavelet='db4'):
    """
    Advanced wavelet denoising specifically tuned for EEG
    """
    # Decompose signal
    coeffs = pywt.wavedec(signal, wavelet, level=4)
    
    # Keep approximation coefficients unchanged
    # and apply progressive thresholding to detail coefficients
    for i in range(1, len(coeffs)):
        # Universal threshold with level-dependent sigma estimation
        sigma = np.median(np.abs(coeffs[i])) / 0.6745
        threshold = sigma * np.sqrt(2 * np.log(len(coeffs[i])))
        # Softer thresholding for lower frequency components
        threshold *= (1 - (i / len(coeffs)))
        coeffs[i] = pywt.threshold(coeffs[i], threshold, mode='garrote')
    
    return pywt.waverec(coeffs, wavelet)

def baseline_correction(signal, window_size=1000):
    """
    Remove baseline wandering using moving average
    """
    # Ensure window size is odd
    window_size = window_size + 1 if window_size % 2 == 0 else window_size
    
    # Compute moving average for baseline estimation
    padding = np.ones(window_size//2)
    padded_signal = np.concatenate([padding * signal[0], signal, padding * signal[-1]])
    baseline = np.convolve(padded_signal, np.ones(window_size)/window_size, mode='valid')
    
    return signal - baseline

def denoise_eeg(signal):
    """
    Enhanced EEG denoising pipeline
    """
    # Get filter functions
    notch_filter, eeg_bandpass = design_filters()
    
    # Step 1: Remove baseline wandering
    signal_baselined = baseline_correction(signal)
    
    # Step 2: Apply notch filter for power line interference
    signal_notched = notch_filter(signal_baselined)
    
    # Step 3: Apply EEG-specific bandpass filter
    signal_filtered = eeg_bandpass(signal_notched)
    
    # Step 4: Apply wavelet denoising
    signal_denoised = wavelet_denoising(signal_filtered)
    
    return signal_denoised

def plot_comparison(noisy_data, denoised_data, title="Signal Comparison", save_path=None):
    """
    Plot comparison between noisy and denoised signals
    """
    plt.figure(figsize=(15, 8))
    
    # Plot first channel as example
    time = np.arange(noisy_data.shape[1]) / 250  # Assuming 250 Hz sampling rate
    
    plt.subplot(2, 1, 1)
    plt.plot(time, noisy_data[0], label='Noisy', alpha=0.7)
    plt.title(f'{title} - Noisy Signal')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.grid(True)
    plt.legend()
    
    plt.subplot(2, 1, 2)
    plt.plot(time, denoised_data[0], label='Denoised', alpha=0.7)
    plt.title('Denoised Signal')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.grid(True)
    plt.legend()
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path)
        plt.close()
    else:
        plt.show()

def calculate_psnr(clean, denoised):
    """
    Calculate PSNR with robust normalization
    """
    # Normalize both signals to same scale
    clean = (clean - np.mean(clean)) / (np.std(clean) + 1e-10)
    denoised = (denoised - np.mean(denoised)) / (np.std(denoised) + 1e-10)
    
    mse = np.mean((clean - denoised) ** 2)
    if mse < 1e-10:
        return 100.0
    
    # Use fixed reference value for max signal after normalization
    max_signal = 3.0  # 3 sigma range
    return 20 * np.log10(max_signal / np.sqrt(mse))

def process_and_save_data(noisy_data_path, output_path, plot_dir=None):
    """
    Process and save denoised EEG data
    """
    noisy_data_path = Path(noisy_data_path)
    output_path = Path(output_path)
    output_path.mkdir(parents=True, exist_ok=True)
    
    if plot_dir:
        plot_dir = Path(plot_dir)
        plot_dir.mkdir(parents=True, exist_ok=True)
    
    class_folders = [f for f in noisy_data_path.iterdir() if f.is_dir()]
    
    results = []
    for class_folder in class_folders:
        print(f"\nProcessing {class_folder.name}")
        
        class_output_path = output_path / class_folder.name
        class_output_path.mkdir(exist_ok=True)
        
        files = list(class_folder.glob('*.npy'))
        
        for file_path in tqdm(files, desc=f"Denoising {class_folder.name}"):
            noisy_data = np.load(file_path)
            
            denoised_data = np.zeros_like(noisy_data)
            for ch in range(noisy_data.shape[0]):
                denoised_data[ch] = denoise_eeg(noisy_data[ch])
            
            output_file = class_output_path / f"denoised_{file_path.name}"
            np.save(output_file, denoised_data)
            
            if plot_dir and file_path == files[0]:
                plot_path = plot_dir / f"{class_folder.name}_comparison.png"
                plot_comparison(noisy_data, denoised_data, 
                              title=f"{class_folder.name} - {file_path.name}",
                              save_path=plot_path)
            
            results.append({
                'class': class_folder.name,
                'file': file_path.name,
                'original_path': str(file_path),
                'denoised_path': str(output_file)
            })
    
    # Save processing summary
    df = pd.DataFrame(results)
    df.to_csv(output_path / 'processing_summary.csv', index=False)
    return df

def calculate_psnr_metrics(denoised_path, clean_path):
    """
    Calculate PSNR metrics between denoised and clean data
    """
    denoised_path = Path(denoised_path)
    clean_path = Path(clean_path)
    
    results = []
    class_folders = [f for f in denoised_path.iterdir() if f.is_dir()]
    
    for class_folder in class_folders:
        print(f"\nProcessing {class_folder.name}")
        clean_class_path = clean_path / class_folder.name
        
        if not clean_class_path.exists():
            print(f"Warning: Clean data folder not found for {class_folder.name}")
            continue
        
        denoised_files = list(class_folder.glob('denoised_*.npy'))
        
        for denoised_file in tqdm(denoised_files, desc=f"Calculating PSNR for {class_folder.name}"):
            clean_file = clean_class_path / denoised_file.name.replace('denoised_', '')
            
            if not clean_file.exists():
                print(f"Warning: Clean file not found for {denoised_file.name}")
                continue
                
            denoised_data = np.load(denoised_file)
            clean_data = np.load(clean_file)
            
            channel_psnrs = []
            for ch in range(clean_data.shape[0]):
                psnr = calculate_psnr(clean_data[ch], denoised_data[ch])
                channel_psnrs.append(psnr)
            
            results.append({
                'class': class_folder.name,
                'file': denoised_file.name,
                'avg_psnr': np.mean(channel_psnrs),
                'min_psnr': np.min(channel_psnrs),
                'max_psnr': np.max(channel_psnrs),
                'std_psnr': np.std(channel_psnrs)
            })
    
    df = pd.DataFrame(results)
    print("\nPSNR Summary Statistics:")
    print(df.describe())
    
    # Save results
    results_path = denoised_path / 'psnr_results.csv'
    df.to_csv(results_path, index=False)
    print(f"\nDetailed results saved to: {results_path}")
    
    return df

def main():
    parser = argparse.ArgumentParser(description='EEG Signal Denoising Pipeline')
    parser.add_argument('--input_dir', type=str, required=True,
                        help='Directory containing noisy EEG data')
    parser.add_argument('--output_dir', type=str, required=True,
                        help='Directory to save denoised data')
    parser.add_argument('--clean_dir', type=str,
                        help='Optional: Directory containing clean data for PSNR evaluation')
    parser.add_argument('--plot_dir', type=str,
                        help='Optional: Directory to save comparison plots')
    
    args = parser.parse_args()
    
    # Step 1: Process and save denoised data
    print("Step 1: Processing and saving denoised data...")
    process_summary = process_and_save_data(args.input_dir, args.output_dir, args.plot_dir)
    
    # Step 2: Calculate PSNR if clean data is provided
    if args.clean_dir:
        print("\nStep 2: Calculating PSNR metrics...")
        psnr_results = calculate_psnr_metrics(args.output_dir, args.clean_dir)
    
    print("\nProcessing completed successfully!")

if __name__ == "__main__":
    # Example usage with hardcoded paths
    noisy_data_path = "/kaggle/input/eeg-datas/EEG_Data/noisy_train_data"
    clean_data_path = "/kaggle/input/eeg-datas/EEG_Data/train_data"
    denoised_output_path = "denoised_data"
    plot_output_path = "comparison_plots"
    
    # You can either use the command line interface:
    # main()
    
    # Or call the functions directly:
    print("Step 1: Processing and saving denoised data...")
    process_summary = process_and_save_data(noisy_data_path, denoised_output_path, plot_output_path)
    
    print("\nStep 2: Calculating PSNR metrics...")
    psnr_results = calculate_psnr_metrics(denoised_output_path, clean_data_path)

Step 1: Processing and saving denoised data...

Processing Complex_Partial_Seizures


Denoising Complex_Partial_Seizures: 100%|██████████| 2196/2196 [01:09<00:00, 31.55it/s]



Processing Normal


Denoising Normal: 100%|██████████| 2783/2783 [01:25<00:00, 32.47it/s]



Processing Video_detected_Seizures_with_no_visual_change_over_EEG


Denoising Video_detected_Seizures_with_no_visual_change_over_EEG: 100%|██████████| 84/84 [00:03<00:00, 27.74it/s]



Processing Electrographic_Seizures


Denoising Electrographic_Seizures: 100%|██████████| 545/545 [00:17<00:00, 31.99it/s]



Step 2: Calculating PSNR metrics...

Processing Video_detected_Seizures_with_no_visual_change_over_EEG


Calculating PSNR for Video_detected_Seizures_with_no_visual_change_over_EEG: 100%|██████████| 84/84 [00:00<00:00, 143.41it/s]



Processing Normal


Calculating PSNR for Normal: 100%|██████████| 2783/2783 [00:18<00:00, 146.87it/s]



Processing Electrographic_Seizures


Calculating PSNR for Electrographic_Seizures: 100%|██████████| 545/545 [00:03<00:00, 144.52it/s]



Processing Complex_Partial_Seizures


Calculating PSNR for Complex_Partial_Seizures: 100%|██████████| 2196/2196 [00:15<00:00, 143.49it/s]



PSNR Summary Statistics:
          avg_psnr     min_psnr     max_psnr     std_psnr
count  5608.000000  5608.000000  5608.000000  5608.000000
mean      8.656671     5.999589    12.228704     1.627040
std       1.411280     0.819848     3.030967     0.659654
min       6.264783     3.848330     7.409066     0.307319
25%       7.587193     5.434902    10.098515     1.145749
50%       8.377553     5.918633    11.603919     1.521642
75%       9.336047     6.445319    13.576932     1.962726
max      15.282770    10.176529    29.743308     4.838150

Detailed results saved to: denoised_data/psnr_results.csv


In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
import xgboost as xgb
from pathlib import Path
from tqdm import tqdm
import pywt
from scipy import signal
from scipy.stats import kurtosis, skew

class KaggleEEGClassifier:
    def __init__(self, sampling_rate=256):
        self.sampling_rate = sampling_rate
        self.frequency_bands = {
            'delta': (0.5, 4),
            'theta': (4, 8),
            'alpha': (8, 13),
            'beta': (13, 30),
            'gamma': (30, 80)
        }
        
    def extract_features(self, signal_data):
        """Extract key EEG features from signal"""
        features = {}
        
        # Time domain features
        features.update({
            'mean': np.mean(signal_data),
            'std': np.std(signal_data),
            'kurtosis': kurtosis(signal_data),
            'skewness': skew(signal_data),
            'peak_to_peak': np.max(signal_data) - np.min(signal_data),
            'zero_crossings': np.sum(np.diff(np.signbit(signal_data)).astype(int)),
            'line_length': np.sum(np.abs(np.diff(signal_data)))
        })
        
        # Frequency domain features
        freqs, psd = signal.welch(signal_data, self.sampling_rate, nperseg=min(256, len(signal_data)))
        total_power = np.sum(psd)
        
        for band_name, (low, high) in self.frequency_bands.items():
            mask = (freqs >= low) & (freqs <= high)
            band_power = np.sum(psd[mask])
            features[f'{band_name}_power'] = band_power
            features[f'{band_name}_ratio'] = band_power / total_power if total_power > 0 else 0
        
        # Wavelet features
        coeffs = pywt.wavedec(signal_data, 'db4', level=4)
        for i, coeff in enumerate(coeffs):
            features[f'wavelet_l{i}_energy'] = np.sum(coeff**2)
            features[f'wavelet_l{i}_mean'] = np.mean(np.abs(coeff))
        
        return features

    def prepare_data(self, denoised_data_path):
        """Process EEG data and extract features"""
        features_list = []
        labels = []
        
        denoised_path = Path(denoised_data_path)
        
        for class_folder in denoised_path.iterdir():
            if not class_folder.is_dir():
                continue
                
            print(f"Processing {class_folder.name}")
            files = list(class_folder.glob('denoised_*.npy'))
            
            for file_path in tqdm(files):
                try:
                    signal_data = np.load(file_path)
                    
                    if np.any(np.isnan(signal_data)):
                        continue
                    
                    # Process each channel
                    file_features = {}
                    for ch in range(signal_data.shape[0]):
                        channel_features = self.extract_features(signal_data[ch])
                        for key, value in channel_features.items():
                            file_features[f'ch{ch}_{key}'] = value
                    
                    # Add cross-channel features
                    for i in range(signal_data.shape[0]):
                        for j in range(i+1, signal_data.shape[0]):
                            correlation = np.corrcoef(signal_data[i], signal_data[j])[0,1]
                            file_features[f'correlation_ch{i}_ch{j}'] = correlation
                    
                    features_list.append(file_features)
                    labels.append(class_folder.name)
                    
                except Exception as e:
                    print(f"Error processing {file_path}: {str(e)}")
                    continue
        
        return pd.DataFrame(features_list), labels

def train_classifier(X, y, random_state=42):
    """Train and evaluate the XGBoost classifier"""
    # Prepare data
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    
    # Initialize model
    model = xgb.XGBClassifier(
        n_estimators=200,
        max_depth=7,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=random_state,
        tree_method='hist',  # Faster training
        objective='multi:softprob',
        eval_metric='mlogloss'
    )
    
    # Create cross-validation folds
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
    
    # Train and evaluate
    scores = []
    for fold, (train_idx, val_idx) in enumerate(cv.split(X, y_encoded), 1):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y_encoded[train_idx], y_encoded[val_idx]
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        
        # Train model
        model.fit(X_train_scaled, y_train,
                 eval_set=[(X_val_scaled, y_val)],
                 early_stopping_rounds=20,
                 verbose=0)
        
        # Evaluate
        y_pred = model.predict(X_val_scaled)
        score = classification_report(y_val, y_pred, output_dict=True)
        scores.append(score['weighted avg']['f1-score'])
        
        print(f"Fold {fold} F1-Score: {scores[-1]:.4f}")
    
    print(f"\nMean F1-Score: {np.mean(scores):.4f} (+/- {np.std(scores):.4f})")
    
    # Train final model on full dataset
    X_scaled = scaler.fit_transform(X)
    model.fit(X_scaled, y_encoded)
    
    return model, scaler, le

def main():
    # Set paths
    DATA_PATH = "/kaggle/working/denoised_data"  # Adjust path as needed
    
    # Initialize classifier
    classifier = KaggleEEGClassifier()
    
    # Prepare data
    print("Preparing dataset...")
    X, y = classifier.prepare_data(DATA_PATH)
    
    print("\nDataset Summary:")
    print(f"Total samples: {len(X)}")
    print(f"Features: {X.shape[1]}")
    print("\nClass distribution:")
    for label, count in pd.Series(y).value_counts().items():
        print(f"{label}: {count}")
    
    # Train and evaluate
    print("\nTraining classifier...")
    model, scaler, label_encoder = train_classifier(X, y)
    
    # Save model and transformers
    model.save_model('eeg_model.json')
    
    return model, scaler, label_encoder

if __name__ == "__main__":
    model, scaler, label_encoder = main()

Preparing dataset...
Processing Video_detected_Seizures_with_no_visual_change_over_EEG


100%|██████████| 84/84 [00:04<00:00, 20.01it/s]


Processing Normal


100%|██████████| 2783/2783 [02:21<00:00, 19.71it/s]


Processing Electrographic_Seizures


100%|██████████| 545/545 [00:27<00:00, 19.67it/s]


Processing Complex_Partial_Seizures


100%|██████████| 2196/2196 [01:54<00:00, 19.13it/s]



Dataset Summary:
Total samples: 5608
Features: 684

Class distribution:
Normal: 2783
Complex_Partial_Seizures: 2196
Electrographic_Seizures: 545
Video_detected_Seizures_with_no_visual_change_over_EEG: 84

Training classifier...


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 1 F1-Score: 0.7738


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 2 F1-Score: 0.7467


KeyboardInterrupt: 

In [7]:
from scipy.fft import fft, fftfreq

def fourier_transform(signals, sampling_rate):
    """
    Perform Fourier Transform on the given signal.
    """
    for signal in signals:
        fft_result = np.abs(fft(signal)) # Get rid of imaginary values
        frequencies = fftfreq(len(signal), 1 / sampling_rate)  # Frequency bins
        fft_magnitudes = fft_result[:len(fft_result)//2]
        frequencies = frequencies[:len(frequencies)//2]
        
    return fft_magnitudes, frequencies

from scipy.signal import find_peaks

def extract_channel_features(i, signal, sampling_rate):
    """Extract specified features for a single channel."""
    # FFT computation
    fft_result, frequencies = fourier_transform([signal], sampling_rate)
    
    # Keep only positive frequencies
    positive_frequencies = frequencies > 0
    fft_result = fft_result[positive_frequencies]
    frequencies = frequencies[positive_frequencies]
    
    # Dominant frequency and amplitude
    dominant_idx = np.argmax(fft_result)
    dominant_frequency = frequencies[dominant_idx]
    dominant_amplitude = fft_result[dominant_idx]
    
    # Total power
    total_power = np.sum(fft_result**2)/len(fft_result)
    
    # Spectral centroid
    spectral_centroid = np.sum(frequencies * fft_result) / np.sum(fft_result)
    
    # Spectral bandwidth
    spectral_bandwidth = np.sqrt(np.sum(((frequencies - spectral_centroid)**2) * fft_result) / np.sum(fft_result))
    
    # Shannon entropy
    spectral_prob = fft_result / np.sum(fft_result)
    spectral_entropy = -np.sum(spectral_prob * np.log2(spectral_prob + 1e-12))  # Adding a small value for numerical stability
    
    # Frequency variance
    frequency_variance = np.var(fft_result)
    
    # Zero-crossing rate (ZCR) of the original signal
    zcr = np.sum(np.sign(np.diff(signal)) != 0)/len(signal)

    return {
        "Channel":i,
        "Dominant_Frequency": dominant_frequency,
        "Dominant_Amplitude": dominant_amplitude,
        "Total_Power": total_power,
        "Spectral_Centroid": spectral_centroid,
        "Spectral_Bandwidth": spectral_bandwidth,
        "Spectral_Entropy": spectral_entropy,
        "Frequency_Variance": frequency_variance,
        "Zero_Crossing_Rate": zcr
    }

def signal_features(signals, sampling_rate):
    features = []
    for i in range(len(signals)):
        signal = signals[i]
        features.append(extract_channel_features(i+1, signal, sampling_rate))
    return pd.DataFrame(features)

def flatten_features(features_df):
    flattened_features = {}
    for column in features_df.columns:
        if column != "Channel":  # Skip the Channel column as it's used for indexing
            for idx, value in enumerate(features_df[column]):
                key = f"{column}_Ch{idx + 1}"
                flattened_features[key] = value
    
    return pd.DataFrame([flattened_features])


In [8]:
# Get .npy files from a class folder and make a dataframe containing its Fourier features
def features_from_folder(folder_path, sampling_rate):

    all_features = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".npy"):  
            file_path = os.path.join(folder_path, filename)
            signals = np.load(file_path) 
                
            sig = signal_features(signals, sampling_rate)
            flat = flatten_features(sig)
            all_features.append(flat)
    
    features_df = pd.concat(all_features,ignore_index=True)
    
    return features_df


In [9]:
# Validation Data

cps_df = features_from_folder("/kaggle/input/eeg-datas/EEG_Data/validation_data/Complex_Partial_Seizures", 1000)
cps_df["Label"] = 1
normal_df = features_from_folder("/kaggle/input/eeg-datas/EEG_Data/validation_data/Normal", 1000)
normal_df["Label"] = 0
es_df = features_from_folder("/kaggle/input/eeg-datas/EEG_Data/validation_data/Electrographic_Seizures", 1000)
es_df["Label"] = 2
vds_df = features_from_folder("/kaggle/input/eeg-datas/EEG_Data/validation_data/Video_detected_Seizures_with_no_visual_change_over_EEG", 1000)
vds_df["Label"] = 3

val_data = pd.concat([cps_df, normal_df, es_df, vds_df], ignore_index=True)
val_data = val_data.sample(frac=1).reset_index(drop=True)
X_val = val_data.drop("Label", axis=1)
y_val = val_data["Label"]

In [10]:
# Denoised Data

cps_den_df = features_from_folder("/kaggle/working/denoised_data/Complex_Partial_Seizures", 1000)
cps_den_df["Label"] = 1
normal_den_df = features_from_folder("/kaggle/working/denoised_data/Normal", 1000)
normal_den_df["Label"] = 0
es_den_df = features_from_folder("/kaggle/working/denoised_data/Electrographic_Seizures", 1000)
es_den_df["Label"] = 2
vds_den_df = features_from_folder("/kaggle/working/denoised_data/Video_detected_Seizures_with_no_visual_change_over_EEG", 1000)
vds_den_df["Label"] = 3

train_data = pd.concat([cps_den_df, normal_den_df, es_den_df, vds_den_df], ignore_index=True)
train_data = train_data.sample(frac=1).reset_index(drop=True)
X_denoised = train_data.drop("Label", axis=1)
y_denoised = train_data["Label"]

In [11]:
print(X_val.shape)
print(X_denoised.shape)

(1403, 152)
(5608, 152)


In [13]:
import xgboost as xgb
model_xgb = xgb.XGBClassifier(
    objective="multi:softprob",  # For multi-class classification
    eval_metric="mlogloss",     # Log loss for evaluation
    use_label_encoder=False,    # Suppress warning for label encoding
    n_estimators=1000,           # Number of trees
    max_depth=4,                # Depth of each tree
    learning_rate=0.1           # Learning rate
)

model_xgb.fit(X_denoised, y_denoised)

In [15]:
y_pred = model_xgb.predict(X_val)
print(classification_report(y_val,y_pred))

              precision    recall  f1-score   support

           0       0.60      0.00      0.01       696
           1       0.39      0.99      0.56       549
           2       0.00      0.00      0.00       137
           3       0.00      0.00      0.00        21

    accuracy                           0.39      1403
   macro avg       0.25      0.25      0.14      1403
weighted avg       0.45      0.39      0.22      1403



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
