## 1.1. Credit Dataset

In [74]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

!pip install imbalanced-learn
from imblearn.over_sampling import SMOTE

df = pd.read_csv('credito.csv')
df.head()




Unnamed: 0,renda_mensal,idade,sexo,historico_credito,credito_aprovado
0,6670.125603,48,m,bom,0
1,5906.280922,60,M,regular,0
2,5268.201825,69,F,bom,0
3,3266.537872,65,F,ruim,0
4,4989.075022,26,M,regular,0


# 1.2. Standardizing Columns and Getting Dummies

In [75]:
df['sexo'] = df['sexo'].apply(lambda x: x.upper())
df = pd.get_dummies(df, columns=["sexo", "historico_credito"], dtype=float)

df.head()

Unnamed: 0,renda_mensal,idade,credito_aprovado,sexo_F,sexo_M,historico_credito_bom,historico_credito_regular,historico_credito_ruim
0,6670.125603,48,0,0.0,1.0,1.0,0.0,0.0
1,5906.280922,60,0,0.0,1.0,0.0,1.0,0.0
2,5268.201825,69,0,1.0,0.0,1.0,0.0,0.0
3,3266.537872,65,0,1.0,0.0,0.0,0.0,1.0
4,4989.075022,26,0,0.0,1.0,0.0,1.0,0.0


## 1.3. Setting and Separating Variables

In [76]:
x = df.drop('credito_aprovado', axis='columns')
y = df['credito_aprovado']
x_scaled = preprocessing.StandardScaler().fit(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25)

smote = SMOTE(random_state=21)
x_train_bal, y_train_bal = smote.fit_resample(x_train, y_train)


## 1.4. Finding Best Solver

In [77]:
solvers = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
scores = {}
max_iter = 15000

for solver in solvers:
  model = LogisticRegression(max_iter=max_iter, solver=solver)
  model.fit(x_train_bal, y_train_bal)
  y_pred = model.predict(x)
  acc = accuracy_score(y, y_pred)
  scores[solver] = acc
  print(f"Accuracy of ({solver}): {acc:.4f}")

best_solver = max(scores, key=scores.get)
print(f"Best solver: {best_solver} with accuracy of: {scores[best_solver]:.4f}")


Accuracy of (lbfgs): 0.5429
Accuracy of (liblinear): 0.5429
Accuracy of (newton-cg): 0.5429
Accuracy of (newton-cholesky): 0.5429
Accuracy of (sag): 0.5971
Accuracy of (saga): 0.6000
Best solver: saga with accuracy of: 0.6000


## 1.5. Fitting the Regression

In [78]:
final_model = LogisticRegression(max_iter=max_iter, solver=best_solver)
final_model.fit(x_train_bal, y_train_bal)
y_test_pred = final_model.predict(x_test)
final_acc = accuracy_score(y_test, y_test_pred)

print(f"Test accuracy using the best solver ({best_solver}): {final_acc:.4f}")


Test accuracy using the best solver (saga): 0.5857


## 2.1. Churn **Dataset**

In [79]:
df = pd.read_csv('churn.csv')
df.head()


Unnamed: 0,tempo_como_cliente,número_de_compras,satisfacao,uso_servico,cancelou
0,5.83009,11,4,39.709883,0
1,1.630554,3,1,38.445688,0
2,5.155415,5,2,55.784812,0
3,2.060499,3,4,49.450117,0
4,6.473215,7,2,46.142809,0


## 2.2. Setting and Separating Variables

In [80]:
x = df.drop('cancelou', axis='columns')
y = df['cancelou']
x_scaled = preprocessing.StandardScaler().fit(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25)

smote = SMOTE(random_state=21)
x_train_bal, y_train_bal = smote.fit_resample(x_train, y_train)


## 2.3. Finding Best Solver

In [81]:
solvers = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
scores = {}
max_iter = 1000

for solver in solvers:
  model = LogisticRegression(max_iter=max_iter, solver=solver)
  model.fit(x_train_bal, y_train_bal)
  y_pred = model.predict(x)
  acc = accuracy_score(y, y_pred)
  scores[solver] = acc
  print(f"Accuracy of ({solver}): {acc:.4f}")

best_solver = max(scores, key=scores.get)
print(f"Best solver: {best_solver} with accuracy of: {scores[best_solver]:.4f}")


Accuracy of (lbfgs): 0.5938
Accuracy of (liblinear): 0.5875
Accuracy of (newton-cg): 0.5938
Accuracy of (newton-cholesky): 0.5938
Accuracy of (sag): 0.5906
Accuracy of (saga): 0.5875
Best solver: lbfgs with accuracy of: 0.5938




## 2.4. Fitting the Regression

In [82]:
final_model = LogisticRegression(max_iter=max_iter, solver=best_solver)
final_model.fit(x_train_bal, y_train_bal)
y_test_pred = final_model.predict(x_test)
final_acc = accuracy_score(y_test, y_test_pred)

print(f"Test accuracy using the best solver ({best_solver}): {final_acc:.4f}")


Test accuracy using the best solver (lbfgs): 0.6125


# SUMMARY OF SOLVERS FOR LogisticRegression (sklearn)

solver='lbfgs'
- Supports multi_class='multinomial' (softmax)
- Fast and robust for small to medium datasets
- Method: Quasi-Newton (BFGS)

solver='liblinear'
- Does not support multi_class='multinomial' (only One-vs-Rest)
- Slow for large datasets
- Method: Coordinate Descent (good for small problems)

solver='newton-cg'
- Supports multi_class='multinomial'
- Efficient for medium to large problems
- Method: Newton-Raphson with Hessian approximation

solver='newton-cholesky'
- Does not support multi_class='multinomial'
- Very fast for small and well-conditioned problems
- Method: Cholesky Decomposition (requires less memory)

solver='sag'
- Supports multi_class='multinomial'
- Very efficient for large datasets (with continuous features)
- Method: Stochastic Average Gradient (variant of GD)

solver='saga'
- Supports multi_class='multinomial'
- Excellent for large datasets
- Method: Enhanced version of SAG (supports sparse data)

