In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, accuracy_score
import joblib

df = pd.read_csv('Telco-Customer-Churn.csv')

selected_features = ['tenure', 'Contract', 'MonthlyCharges', 'PaperlessBilling', 'PaymentMethod']
target = 'Churn'

df['Churn'] = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

X = df[selected_features]
y = df[target]

numerical_cols = ['tenure', 'MonthlyCharges']
categorical_cols = ['Contract', 'PaperlessBilling', 'PaymentMethod']

numerical_imputer = SimpleImputer(strategy='mean')
categorical_imputer = SimpleImputer(strategy='most_frequent')

for col in numerical_cols:
    X[col] = numerical_imputer.fit_transform(X[[col]]).ravel()

for col in categorical_cols:
    X[col] = categorical_imputer.fit_transform(X[[col]]).ravel()

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ]
)

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model_pipeline.fit(X_train, y_train)

y_pred = model_pipeline.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

joblib.dump(model_pipeline, 'logistic_model.pkl')
print("Model berhasil disimpan ke 'logistic_model.pkl'")

Accuracy: 0.7650816181689141
              precision    recall  f1-score   support

           0       0.81      0.89      0.85      1035
           1       0.58      0.43      0.49       374

    accuracy                           0.77      1409
   macro avg       0.69      0.66      0.67      1409
weighted avg       0.75      0.77      0.75      1409

Model berhasil disimpan ke 'logistic_model.pkl'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = numerical_imputer.fit_transform(X[[col]]).ravel()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = numerical_imputer.fit_transform(X[[col]]).ravel()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = categorical_imputer.fit_transform(X[[col]]).ravel()
A value is trying to be se