In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import utils
import constants

# Load and preprocess data
df = utils.load_data(constants.ORIGINAL_DATA_FILE_PATH)
df = utils.clean_data(df)
df, customer_id_text_column_names = utils.split_customer_id(df)
df = utils.encode_yes_no_columns(df)
df = utils.encode_contract_column(df)

# One-hot encoding
df, onehot_encoders = utils.create_one_hot_coded_columns(df, constants.COLUMNS_TO_ENCODE + customer_id_text_column_names)

# Convert columns to float
df = utils.convert_columns_to_float(df)

# Normalization and standardization
df, standard_scalers = utils.apply_standard_scaling(df, constants.BELL_CURVE_TYPE_COLUMNS)
df, minmax_scalers = utils.apply_minmax_scaling(df, constants.NOT_BELL_CURVE_TYPE_COLUMNS)

# Create necessary folders
utils.create_folder("artifacts/encoders")
utils.create_folder("artifacts/scalers")
utils.create_folder("artifacts/preprocessed_data")

# Save encoders and scalers
utils.save_pickle(onehot_encoders, constants.ENCODERS_PATH)
utils.save_pickle(standard_scalers, constants.STANDARD_SCALERS_PATH)
utils.save_pickle(minmax_scalers, constants.MINMAX_SCALERS_PATH)
# Save preprocessed dataframe (just for why not)
df.to_csv(constants.PREPROCESSED_TRAIN_DATA_PATH, index=False)

# Train model
X = df.drop('Churn_encoded', axis=1)
y = df['Churn_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=constants.TEST_SIZE, random_state=constants.RANDOM_STATE)

lr_model = LogisticRegression(**constants.LR_BEST_PARAMS)
lr_model.fit(X_train, y_train)

# Model evaluation
train_accuracy = accuracy_score(y_train, lr_model.predict(X_train))
test_accuracy = accuracy_score(y_test, lr_model.predict(X_test))

print(f'Train accuracy: {train_accuracy}')
print(f'Test accuracy: {test_accuracy}')

# Save trained model (for inference_pipeline_to_use)
utils.save_pickle(lr_model, constants.TRAINED_MODEL_PATH)

print(f'Trained model saved at: {constants.TRAINED_MODEL_PATH}')