In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
data_path = r"C:\Users\Daghan\OneDrive\Masaüstü\DMP\BCD_dataset_2.csv"
df = pd.read_csv(data_path)

# Assume the target variable is 'Class' and features are the rest of the columns
X = df.drop('Class', axis=1)
y = df['Class']

# Identify categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns

# Create preprocessor with one-hot encoding for categorical variables and standard scaling for numeric variables
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), X.select_dtypes(include=['float64'])),
        ('cat', OneHotEncoder(), categorical_cols)
    ])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the Logistic Regression model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(random_state=42))
])

# Train the model on the training data
model.fit(X_train, y_train)

# Predictions on training set
train_predictions = model.predict(X_train)

# Predictions on test set
test_predictions = model.predict(X_test)

# Calculate accuracy and F1 score on training set
train_accuracy = accuracy_score(y_train, train_predictions)
train_f1_score = f1_score(y_train, train_predictions, pos_label='M')  # Assuming 'M' is the positive class

# Calculate accuracy and F1 score on test set
test_accuracy = accuracy_score(y_test, test_predictions)
test_f1_score = f1_score(y_test, test_predictions, pos_label='M')  # Assuming 'M' is the positive class

# Print results
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Training F1 Score: {train_f1_score:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")
print(f"Testing F1 Score: {test_f1_score:.4f}")


ValueError: No valid specification of the columns. Only a scalar, list or slice of all integers or all strings, or boolean mask is allowed