In [3]:
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

def train_and_evaluate_model_with_cv(file_name, target_variable, cv_splits):
    # Load the dataset from CSV file into a DataFrame
    df = pd.read_csv(file_name)

    # Check if the target variable is 'Yes' or 'No' and convert to 1 and 0 if needed
    if df[target_variable].dtype == 'object':
        df[target_variable] = df[target_variable].map({'Yes': 1, 'No': 0})

    for col in df.columns:
        if df[col].dtype == 'object':
            try:
                df[col] = pd.to_numeric(df[col], errors='coerce')
                df[col].fillna(0, inplace=True)
            except ValueError:
                pass  # Column contains non-numeric values

    # Separate features (X) and target variable (y)
    X = df.drop(target_variable, axis=1)
    y = df[target_variable]

    # Standardize numerical features using z-scores
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

    # Initialize the logistic regression model
    model = LogisticRegression(max_iter=100)

    # Perform cross-validation
    scores = cross_val_score(model, X, y, cv=cv_splits)

    # Calculate the average accuracy score from cross-validation
    avg_accuracy = scores.mean()

    return avg_accuracy

# Example usage:
file_name = 'WA_Fn-UseC_-Telco-Customer-Churn.csv'
target_variable = 'Churn'
cv_splits = 5  # Number of cross-validation splits
average_accuracy = train_and_evaluate_model_with_cv(file_name, target_variable, cv_splits)
print("Average Accuracy:", average_accuracy)


Average Accuracy: 0.7909990160655526
