In [1]:
import os
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis

In [11]:
import os
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis

def compute_entropy(signal, bins=50):
    hist, _ = np.histogram(signal, bins=bins, density=True)
    hist = hist[hist > 0]  # Fjern nuller for å unngå log(0)
    return -np.sum(hist * np.log2(hist))

def extract_channel_features(signal):
    feats = {}
    feats['mean'] = np.mean(signal)
    feats['std'] = np.std(signal)
    feats['skew'] = skew(signal)
    feats['kurtosis'] = kurtosis(signal)
    feats['entropy'] = compute_entropy(signal)
    feats['rms'] = np.sqrt(np.mean(signal**2))
    feats['max'] = np.max(signal)
    feats['p2p'] = np.ptp(signal)  # peak-to-peak
    return feats

def process_file(file_path):
    """
    Leser én fil (f.eks. '1st_test/1st_test/2003.10.22.12.06.24').
    For 1st_test antar vi at filen har 8 kolonner:
    B1_a, B1_b, B2_a, B2_b, B3_a, B3_b, B4_a, B4_b.
    """
    df = pd.read_csv(file_path, header=None, delim_whitespace=True)
    if df.shape[1] < 8:
        print(f"Advarsel: {file_path} har ikke 8 kolonner!")

    # Gi kolonnene navn
    col_names = ["B1_a", "B1_b", "B2_a", "B2_b", "B3_a", "B3_b", "B4_a", "B4_b"]
    df.columns = col_names

    # Lagre features i en ordbok (én rad)
    features = {}
    file_name = os.path.basename(file_path)  # F.eks. "2003.10.22.12.06.24"
    features['timestamp'] = file_name

    # Ekstraher features for hver kolonne
    for col in col_names:
        signal = df[col].values
        channel_feats = extract_channel_features(signal)
        for feat_name, val in channel_feats.items():
            features[f"{col}_{feat_name}"] = val

    return features

def process_directory(dir_path):
    """
    Leser alle filer i dir_path som starter med '2003' eller '2004',
    og returnerer en DataFrame med én rad per fil.
    """
    all_features = []
    file_list = sorted(os.listdir(dir_path))

    for file_name in file_list:
        # IMS-filer i 1st_test heter ofte "2003.10.22.12.06.24" osv.
        if file_name.startswith("2003") or file_name.startswith("2004"):
            file_path = os.path.join(dir_path, file_name)
            feats = process_file(file_path)
            all_features.append(feats)

    # Konverter liste av ordbøker til DataFrame
    return pd.DataFrame(all_features)

# ---------------------------------------------------
# EKSEMPEL: Behandle hele mappen 1st_test/1st_test
# ---------------------------------------------------

dataset_path_1st = './3rd_test/4th_test/txt'

# Kall process_directory med en mappe, ikke process_file
features_1st = process_directory(dataset_path_1st)
print(features_1st.head())

# Lagre resultatet i CSV
features_1st.to_csv("3rd_test_withfeatures.csv", index=False)
print("Ferdig! Filen '3rd_test_withfeatures.csv' er opprettet.")

Advarsel: ./3rd_test/4th_test/txt/2004.03.04.09.27.46 har ikke 8 kolonner!


ValueError: Length mismatch: Expected axis has 4 elements, new values have 8 elements