In [2]:
import time
import pandas as pd
import numpy as np
from scipy.signal import stft
import glob
import os
from sklearn.preprocessing import StandardScaler

# Başlangıç zamanı
start_time = time.time()

# STFT parametreleri
fs = 90  # Örnekleme frekansı (90 Hz)
nperseg = int(4 * fs)  # Segment uzunluğu (4 saniye)
overlap = int(nperseg * 0.75)  # %75 örtüşme

# Kullanılacak sütunlar (sensör verileri)
columns_to_transform = ['Speed', 'Voice', 'Acceleration X', 'Acceleration Y', 'Acceleration Z', 'Gyro X', 'Gyro Y', 'Gyro Z']

# Frekans bantlarını tanımla (10 Hz genişlik, 5 Hz örtüşme)
bands = {chr(97 + i): (i * 5, i * 5 + 10) for i in range(18)}

# Sınıflar (multi-class için dosya adlarından alınacak sınıflar)
class_labels = [
    "notchshort", "scracth", "singlecutshort", "singlecutlong",
    "twocutlong", "twocutshort", "warped", "notchlong", "healthy"
]

def calculate_band_features(frequencies, magnitude_spectrum, band):
    band_indices = np.where((frequencies >= band[0]) & (frequencies < band[1]))[0]

    if len(band_indices) == 0 or np.max(band_indices) >= magnitude_spectrum.shape[0]:
        return 0, 0, 0, 0, 0, 0, 0

    band_power = np.sum(magnitude_spectrum[band_indices, :])
    max_power = np.max(magnitude_spectrum[band_indices, :])
    min_power = np.min(magnitude_spectrum[band_indices, :])
    std_power = np.std(magnitude_spectrum[band_indices, :])
    median_power = np.median(magnitude_spectrum[band_indices, :])

    if magnitude_spectrum[band_indices, :].size > 0:
        peak_idx = np.argmax(np.sum(magnitude_spectrum[band_indices, :], axis=1))
        peak_frequency = frequencies[band_indices[peak_idx]]
    else:
        peak_frequency = 0

    if np.sum(magnitude_spectrum[band_indices, :]) > 0:
        mean_frequency = np.sum(frequencies[band_indices] * magnitude_spectrum[band_indices, :].sum(axis=1)) / np.sum(magnitude_spectrum[band_indices, :])
    else:
        mean_frequency = 0

    return band_power, max_power, min_power, std_power, median_power, peak_frequency, mean_frequency

def extract_features_from_file(file_path):
    data = pd.read_csv(file_path)
    features = {}

    for col in columns_to_transform:
        if col in data.columns:
            signal_length = len(data[col].values)
            nperseg = min(signal_length, int(4 * fs))
            noverlap = min(nperseg - 1, int(nperseg * 0.75))

            f, t, Zxx = stft(data[col].values, fs=fs, nperseg=nperseg, noverlap=noverlap)
            magnitude_spectrum = np.abs(Zxx)

            for band_name, band_range in bands.items():
                band_indices = np.where((f >= band_range[0]) & (f < band_range[1]))[0]

                if len(band_indices) == 0 or np.max(band_indices) >= magnitude_spectrum.shape[0]:
                    continue

                band_power = np.sum(magnitude_spectrum[band_indices, :])
                max_power = np.max(magnitude_spectrum[band_indices, :])
                min_power = np.min(magnitude_spectrum[band_indices, :])
                std_power = np.std(magnitude_spectrum[band_indices, :])
                median_power = np.median(magnitude_spectrum[band_indices, :])

                if len(band_indices) > 0 and np.argmax(magnitude_spectrum[band_indices, :]) < len(band_indices):
                    peak_frequency = f[band_indices[np.argmax(magnitude_spectrum[band_indices, :])]]
                else:
                    peak_frequency = 0

                mean_frequency = np.sum(f[band_indices] * magnitude_spectrum[band_indices, :].sum(axis=1)) / np.sum(magnitude_spectrum[band_indices, :])

                features[f'{col}_{band_name}_power'] = band_power
                features[f'{col}_{band_name}_max_power'] = max_power
                features[f'{col}_{band_name}_min_power'] = min_power
                features[f'{col}_{band_name}_std_power'] = std_power
                features[f'{col}_{band_name}_median_power'] = median_power
                features[f'{col}_{band_name}_peak_frequency'] = peak_frequency
                features[f'{col}_{band_name}_mean_frequency'] = mean_frequency

    # Dosya adından sınıf etiketini çıkar
    label = None
    if "healthy" in os.path.basename(file_path).lower():
        label = "healthy"
    else:
        for class_label in class_labels:
            if class_label in os.path.basename(file_path).lower():
                label = class_label
                break

    if label is None:
        raise ValueError(f"Unknown class label in file: {file_path}")

    features['label'] = label
    return features

# Tüm CSV dosyaları için feature extraction işlemi
csv_files = glob.glob('output/*.csv')
all_features = []

for file in csv_files:
    features = extract_features_from_file(file)
    all_features.append(features)

# Özellikleri birleştir ve CSV olarak kaydet
features_df = pd.DataFrame(all_features)
output_feature_file = 'extracted_features_multi_class2.csv'
features_df.to_csv(output_feature_file, index=False)

print(f"Feature extraction completed, saved to {output_feature_file}")

# Standardizasyon işlemi
file_path = 'extracted_features_multi_class2.csv'
features_df = pd.read_csv(file_path)

# Eksik değer içeren sütunları sil
features_cleaned = features_df.dropna(axis=1)

# 'label' sütununu ayır
if 'label' in features_cleaned.columns:
    labels = features_cleaned['label']
    features_cleaned = features_cleaned.drop('label', axis=1)
else:
    labels = None

# Standardizasyon işlemi
scaler = StandardScaler()
standardized_features = scaler.fit_transform(features_cleaned)

# Standardize edilmiş veriyi DataFrame'e çevir
standardized_df = pd.DataFrame(standardized_features, columns=features_cleaned.columns)

# 'label' sütununu geri ekle
if labels is not None:
    standardized_df['label'] = labels

# Sonucu kaydet
output_file_path = 'standardized_features_multiclass2.csv'
standardized_df.to_csv(output_file_path, index=False)

print(f"Standardizasyon tamamlandı ve sonuçlar {output_file_path} dosyasına kaydedildi.")

# Bitiş zamanı
end_time = time.time()

# Toplam çalışma süresi
execution_time = end_time - start_time
print(f"Total running time of the code: {execution_time:.2f} saniye")


Feature extraction completed, saved to extracted_features_multi_class2.csv
Standardizasyon tamamlandı ve sonuçlar standardized_features_multiclass2.csv dosyasına kaydedildi.
Total running time of the code: 2.60 saniye
