In [5]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.model_selection import cross_val_score
from skopt import gp_minimize

In [6]:
train_data = pd.read_csv("archive/train.csv")
test_data = pd.read_csv("archive/test.csv")

In [7]:
print(test_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Pclass       418 non-null    int64  
 2   Name         418 non-null    object 
 3   Sex          418 non-null    object 
 4   Age          332 non-null    float64
 5   SibSp        418 non-null    int64  
 6   Parch        418 non-null    int64  
 7   Ticket       418 non-null    object 
 8   Fare         417 non-null    float64
 9   Cabin        91 non-null     object 
 10  Embarked     418 non-null    object 
dtypes: float64(2), int64(4), object(5)
memory usage: 36.1+ KB
None


In [8]:
print(train_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
None


In [9]:
def prepare_data(dataset: pd.DataFrame) -> pd.DataFrame:
    dataSet = dataset.copy()
    sex = {'female':1, 'male':0}
    dataSet['woman'] = dataSet['Sex'].map(sex)
    dataSet['Fare'] = dataSet['Fare'].fillna(dataSet['Fare'].mean())
    dataSet['Age'] = dataSet['Age'].fillna(dataSet['Age'].mean())
    dataSet['Embarked'] = dataSet['Embarked'].fillna(dataSet['Embarked'].mode()[0])
    port = {'S':1, 'C':2, 'Q':3}
    dataSet['Port'] = dataSet['Embarked'].map(port)
    dataSet['Child'] = np.where(dataSet['Age'] < 14, 1, 0)
    return dataSet
    
y_train = train_data['Survived']
x_train = train_data.drop(['PassengerId', 'Survived'], axis=1)
x_test = test_data.drop(['PassengerId'], axis=1)

x_train = prepare_data(x_train)
x_test = prepare_data(x_test)

features = ["Age", "woman", "Port", "Child", "Fare"]
x_train = x_train[features]
x_test = x_test[features]

In [10]:
print(x_test.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Age     418 non-null    float64
 1   woman   418 non-null    int64  
 2   Port    418 non-null    int64  
 3   Child   418 non-null    int64  
 4   Fare    418 non-null    float64
dtypes: float64(2), int64(3)
memory usage: 16.5 KB
None


In [11]:
def lr_train_model(parameters):
    model = LogisticRegression(penalty =parameters[0], C=parameters[1], max_iter=parameters[2], random_state=0)
    score = cross_val_score(model, x_train, y_train, cv=5, scoring='accuracy').mean()
    return -score


In [12]:
parameters = [
    ('l1', 'l2'), (0.0001, 100.0), (50, 500)
    ]
otimized_parameters = gp_minimize(lr_train_model, parameters, n_calls=50, random_state=0)
model_lr = LogisticRegression(penalty='l1', C=0.001, max_iter=100, solver='saga')
model_lr.fit(x_train, y_train)
y_train_pred = model_lr.predict(x_train)
mse = mean_squared_error(y_train, y_train_pred)


print("Acuracia", accuracy_score(y_train, y_train_pred))

ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\joaov\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\joaov\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\joaov\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1218, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\joaov\AppData\Local\Programs\Python\Python313\Lib\site-packages\sklearn\linear_model\_logistic.py", line 64, in _check_solver
    raise ValueError(
    ...<2 lines>...
    )
ValueError: Solver lbfgs supports only 'l2' or None penalties, got l1 penalty.
