In [1]:
from specialist_data import pipeline

manager = pipeline()

In [2]:
from sklearn.model_selection import train_test_split

s10 = manager.transformed.get('s10')
df = s10.X
df['target'] = s10.performance

features_list = s10.features

selected = df.query('index >= 10000')

X_train, X_test, y_train, y_test = train_test_split(
    selected[features_list],
    selected.target,
    test_size=0.6
)

index = X_test.index

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV

base_model = LogisticRegression(random_state=42)
distributons = dict(
    penalty=['l2', 'l1', 'elasticnet'],
    solver=['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    C=[i/10 for i in range(6, 11)]
)

search_model = RandomizedSearchCV(base_model, distributons)
search = search_model.fit(X_train, y_train)
search.best_estimator_

LogisticRegression(C=0.8, random_state=42, solver='saga')

In [4]:
model = LogisticRegression(C=0.9, penalty='l1', random_state=42, solver='liblinear')

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [5]:
import plotly.graph_objects as go

fg = go.Figure()
fg.add_trace(
    go.Scatter(
        x=index,
        y=y_pred,
        mode='markers',
        marker_size=3
    )
)
fg.add_trace(
    go.Scatter(
        x=index,
        y=y_test,
        mode='markers',
        marker_size=3
    ),
)