-
Notifications
You must be signed in to change notification settings - Fork 0
/
lr-large.py
31 lines (24 loc) · 1.06 KB
/
lr-large.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from feature_engine.imputation import CategoricalImputer, MeanMedianImputer
from feature_engine.encoding import OneHotEncoder
from sklearn.metrics import accuracy_score
train_df_small = pd.read_csv('data/data-train-large.csv')
test_df_small = pd.read_csv('data/data-test-large.csv')
X_train, y_train = train_df_small.drop(columns = 'Survived'), train_df_small.Survived
X_test, y_test = test_df_small.drop(columns = 'Survived'), test_df_small.Survived
preprocess = Pipeline(steps = [
('imp_num', MeanMedianImputer(imputation_method='mean')),
('imp_cat', CategoricalImputer(imputation_method='frequent')),
('ohe', OneHotEncoder()),
('sc', StandardScaler())
])
model_pipe = Pipeline(steps = [
('prep', preprocess),
('model', LogisticRegression(random_state=123))
])
model_pipe.fit(X_train,y_train)
y_pred = model_pipe.predict(X_test)
print("El accuracy obtenido es:", accuracy_score(y_test, y_pred))