In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt

def load_data(processed_data_path="processed_data"):
    features = []
    labels = []
    
    # Load term data (label 0)
    term_path = f"{processed_data_path}/term"
    for file in os.listdir(term_path):
        if file.endswith('.csv'):
            df = pd.read_csv(os.path.join(term_path, file))
            # Create feature vector using means of signals
            feature_vector = [
                df['EHG1'].mean(),
                df['EHG2'].mean(),
                df['EHG3'].mean(),
                df['EHG1'].std(),
                df['EHG2'].std(),
                df['EHG3'].std()
            ]
            features.append(feature_vector)
            labels.append(0)  # 0 for term
            
    # Load preterm data (label 1)
    preterm_path = f"{processed_data_path}/preterm"
    for file in os.listdir(preterm_path):
        if file.endswith('.csv'):
            df = pd.read_csv(os.path.join(preterm_path, file))
            feature_vector = [
                df['EHG1'].mean(),
                df['EHG2'].mean(),
                df['EHG3'].mean(),
                df['EHG1'].std(),
                df['EHG2'].std(),
                df['EHG3'].std()
            ]
            features.append(feature_vector)
            labels.append(1)  # 1 for preterm
            
    return np.array(features), np.array(labels)

def train_model():
    # Load the data
    X, y = load_data()
    
    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Create and train model
    model = LogisticRegression(class_weight='balanced', max_iter=1000)
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test_scaled)
    
    # Print results
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    
    return model, scaler

# Run the training
if __name__ == "__main__":
    model, scaler = train_model()

Loading data...
Splitting data into train and test sets...


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.