In [2]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset into a pandas DataFrame
data = pd.read_csv('../datasets/insurance.csv')

# Define the feature and target columns
X = data.drop(['charges'], axis=1)
y = data['charges']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the categorical and continuous columns
categorical_cols = ['sex', 'smoker', 'region']
continuous_cols = ['age', 'bmi', 'children']

# Create the column transformer to preprocess the data
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        ('num', StandardScaler(), continuous_cols)
    ])

# Define the logistic regression model
clf = LogisticRegression()

# Create the pipeline to preprocess and model the data
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', clf)])

# Fit the model to the training data
clf_model = clf.fit(X_train, y_train)

# Predict the charges for the test data
y_pred = pipeline.predict(X_test)

# Evaluate the model performance on the test data
score = pipeline.score(X_test, y_test)
print(f"Accuracy: {score}")

ValueError: Unknown label type: 'continuous'