In [18]:
import os
import mlflow

from sklearn.model_selection import train_test_split
import pandas as pd
import numpy

from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder, TargetEncoder

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score

In [19]:
df = pd.read_pickle('../data/clean_data.pkl')

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1437 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   battery_power  1437 non-null   int64   
 1   blue           1437 non-null   category
 2   clock_speed    1437 non-null   float16 
 3   dual_sim       1437 non-null   category
 4   fc             1437 non-null   int8    
 5   four_g         1437 non-null   category
 6   int_memory     1437 non-null   int8    
 7   m_dep          1437 non-null   float16 
 8   mobile_wt      1437 non-null   int64   
 9   n_cores        1437 non-null   int8    
 10  pc             1437 non-null   int8    
 11  px_height      1437 non-null   int64   
 12  px_width       1437 non-null   int64   
 13  ram            1437 non-null   int64   
 14  sc_h           1437 non-null   int8    
 15  sc_w           1437 non-null   int8    
 16  talk_time      1437 non-null   int8    
 17  three_g        1437 non-null   categor

In [21]:
df = df.rename(columns={'price_range': 'target'})

In [22]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('target', axis=1), df['target'], test_size=0.25, random_state=2)

In [23]:
cat_features = X_train.select_dtypes(include=['category','object']).columns.to_list()
cat_features

['blue', 'dual_sim', 'four_g', 'three_g', 'touch_screen', 'wifi']

In [24]:
num_features = X_train.select_dtypes(include=['number']).columns.to_list()
num_features

['battery_power',
 'clock_speed',
 'fc',
 'int_memory',
 'm_dep',
 'mobile_wt',
 'n_cores',
 'pc',
 'px_height',
 'px_width',
 'ram',
 'sc_h',
 'sc_w',
 'talk_time']

In [25]:
s_scaler = StandardScaler()
l_encoder = TargetEncoder() 
classifier = RandomForestClassifier()

In [26]:
# –î–ª—è —É–¥–æ–±–Ω–æ–π —Ä–∞–±–æ—Ç—ã —Å–æ —Å—Ç–æ–ª–±—Ü–∞–º–∏
preprocessor = ColumnTransformer(
    transformers=[
        ('num', s_scaler, num_features),  # –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –¥–ª—è —á–∏—Å–ª–æ–≤—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
        ('cat', l_encoder, cat_features), # –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∞–ª—å–Ω—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
    ],
    remainder='drop' ) # –£–¥–∞–ª—è–µ–º —Å—Ç–æ–ª–±—Ü—ã, –∫–æ—Ç–æ—Ä—ã–µ –Ω–µ –∑–∞—Ç—Ä–æ–Ω—É—Ç—ã –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è

In [27]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor), 
                           ('model', classifier)])

pipeline.fit(X_train, y_train)

In [28]:
predictions = pipeline.predict(X_test) 

metrics = {}
metrics["precision"] = precision_score(y_test, predictions, average='weighted')   
metrics["recall"] = recall_score(y_test, predictions, average='weighted')
#metrics["roc_auc"] = roc_auc_score(y_test, predictions, average='ovo')
metrics["f1"] = f1_score(y_test, predictions, average='weighted')

metrics

{'precision': np.float64(0.8370973132704526),
 'recall': np.float64(0.825),
 'f1': np.float64(0.8238112400541299)}

In [29]:
# –†–∞–±–æ—Ç–∞–µ–º —Å MLflow –ª–æ–∫–∞–ª—å–Ω–æ
TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000

registry_uri = f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"
tracking_uri = f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}"

mlflow.set_tracking_uri(tracking_uri)   
mlflow.set_registry_uri(registry_uri) 

In [30]:
# –Ω–∞–∑–≤–∞–Ω–∏–µ —Ç–µ—Å—Ç–æ–≤–æ–≥–æ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞, –∑–∞–ø—É—Å–∫–∞ (run) –≤–Ω—É—Ç—Ä–∏ –Ω–µ–≥–æ, –∏–º–µ–Ω–∏, –ø–æ–¥ –∫–æ—Ç–æ—Ä—ã–º –º–æ–¥–µ–ª—å –±—É–¥–µ—Ç —Ä–µ–≥–∏—Å—Ç—Ä–∏—Ä–æ–≤–∞—Ç—å—Å—è
EXPERIMENT_NAME = "estate_project"
RUN_NAME = "baseline model"
REGISTRY_MODEL_NAME = "estate_model_rf"

In [31]:
# –û–±—è–∑–∞—Ç–µ–ª—å–Ω–æ –ª–æ–≥–∏—Ä—É–µ–º —Å–∏–≥–Ω–∞—Ç—É—Ä—É –º–æ–¥–µ–ª–∏ –∏ –ø—Ä–∏–º–µ—Ä –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö. –ü–æ–¥–≥–æ—Ç–æ–≤–∏–º –∏—Ö
from mlflow.models import infer_signature

signature =  infer_signature(model_input = X_train.head(5))
input_example = X_train.head(5)



In [32]:
# –ë—É–¥–µ–º –ª–æ–≥–∏—Ä–æ–≤–∞—Ç—å requirements –∏ –∞—Ä—Ç–µ—Ñ–∞–∫—Ç - —Ç–µ–∫—Å—Ç–æ–≤—ã–π —Ñ–∞–π–ª
req_file = '../requirements.txt'

In [33]:
# –ü–∞—Ä–∞–º–µ—Ç—Ä—ã, –∫–æ—Ç–æ—Ä–æ—ã–µ –±—É–¥—É—Ç –∑–∞–ª–æ–≥–∏—Ä–æ–≤–∞–Ω—ã, –º–æ–∂–µ–º –∑–∞–¥–∞–≤–∞—Ç—å –≤—Ä—É—á–Ω—É—é –∏–ª–∏ –ø–æ–ª–Ω–æ—Å—Ç—å—é –≤–∑—è—Ç—å –∏–∑ –º–æ–¥–µ–ª–∏
#params_dict = {'n_estimators': 10, 'max_depth': 10}
params_dict = pipeline.get_params()

In [34]:
# –ö–æ–≥–¥–∞ —Å–æ–∑–¥–∞–µ–º –Ω–æ–≤—ã–π —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç, —Ç–æ: 
#experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)

# –í–ø–æ—Å–ª–µ–¥—Å—Ç–≤–∏–∏. —á—Ç–æ–±—ã –¥–æ–±–∞–≤–ª—è—Ç—å –∑–∞–ø—É—Å–∫–∏ –≤ —ç—Ç–æ—Ç –∂–µ —ç–∫—Å–µ–ø—Ä–∏–º–µ–Ω—Ç –º—ã –¥–æ–ª–∂–Ω—ã –ø–æ–ª—É—á–∏—Ç—å –µ–≥–æ id:
experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    # –ø–æ–ª—É—á–∞–µ–º —É–Ω–∏–∫–∞–ª—å–Ω—ã–π –∏–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ç–æ—Ä –∑–∞–ø—É—Å–∫–∞ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞
    run_id = run.info.run_id 
    mlflow.sklearn.log_model(pipeline, 
                             artifact_path="models",
                             signature=signature,
                             input_example=input_example,
                             pip_requirements=req_file
                             )
    mlflow.log_metrics(metrics)
    mlflow.log_params(params_dict)

run = mlflow.get_run(run_id) 
assert (run.info.status =='FINISHED')

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [00:00<00:00, 24.37it/s]  
2024/11/02 15:21:37 INFO mlflow.tracking._tracking_service.client: üèÉ View run baseline model at: http://127.0.0.1:5000/#/experiments/1/runs/1544c6e89efe45cbb85307726f21aaa5.
2024/11/02 15:21:37 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1.


In [35]:
mlflow.sklearn.autolog()

with mlflow.start_run(run_name='auto', experiment_id=experiment_id) as run:
    pipeline.fit(X_train, y_train)



2024/11/02 15:21:53 INFO mlflow.tracking._tracking_service.client: üèÉ View run auto at: http://127.0.0.1:5000/#/experiments/1/runs/dcdaece1a21c43a6b53c358021e95d52.
2024/11/02 15:21:53 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1.


In [36]:
mlflow.sklearn.autolog(disable=True)

In [37]:
classifier2 = RandomForestClassifier(n_estimators=10, max_depth=6)

In [38]:
pipeline = Pipeline(steps=[('preprocessor', preprocessor), 
                           ('model', classifier2)])

pipeline.fit(X_train, y_train)

In [39]:
predictions = pipeline.predict(X_test) 

metrics = {}
metrics["precision"] = precision_score(y_test, predictions, average='weighted')   
metrics["recall"] = recall_score(y_test, predictions, average='weighted')
#metrics["roc_auc"] = roc_auc_score(y_test, predictions, average='ovo')
metrics["f1"] = f1_score(y_test, predictions, average='weighted')

metrics

{'precision': np.float64(0.7178459453267619),
 'recall': np.float64(0.7138888888888889),
 'f1': np.float64(0.7119118973982015)}

In [40]:
# !!! –ü—Ä–æ–≤–µ—Ä–∏—Ç—å –Ω–∞–∑–≤–∞–Ω–∏–µ –ø—Ä–æ–≥–æ–Ω–∞ –∞ —Ç–∞–∫–∂–µ –≤—Å–µ –ª–æ–≥–∏—Ä—É–µ–º—ã–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏ –∞—Ä—Ç–µ—Ñ–∞–∫—Ç—ã, —á—Ç–æ –æ–Ω–∏ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É—é—Ç –≤—Ç–æ—Ä–æ–π "–º–∞–ª–µ–Ω—å–∫–æ–π" –º–æ–¥–µ–ª–∏. 

RUN_NAME = 'smaller_model'

experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    # –ø–æ–ª—É—á–∞–µ–º —É–Ω–∏–∫–∞–ª—å–Ω—ã–π –∏–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ç–æ—Ä –∑–∞–ø—É—Å–∫–∞ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞
    run_id = run.info.run_id 
    mlflow.sklearn.log_model(pipeline, 
                             artifact_path="models",
                             signature=signature,
                             input_example=input_example,
                             pip_requirements=req_file
                             )
    mlflow.log_metrics(metrics)
    mlflow.log_params(pipeline.get_params())

run = mlflow.get_run(run_id) 
assert (run.info.status =='FINISHED')

Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [00:00<00:00, 25.83it/s]
2024/11/02 15:21:56 INFO mlflow.tracking._tracking_service.client: üèÉ View run smaller_model at: http://127.0.0.1:5000/#/experiments/1/runs/2cb5057ea6924d57881ab5a966edaf24.
2024/11/02 15:21:56 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1.


In [41]:
# No model
# –õ–æ–≥–∏—Ä–æ–≤–∞—Ç—å –º–æ–∂–Ω–æ —Ç–æ–ª—å–∫–æ –∞—Ä—Ç–µ—Ñ–∞–∫—Ç—ã, –±–µ–∑ –º–æ–¥–µ–ª–∏. –ù–∞–ø—Ä–∏–º–µ—Ä, –∑–∞–ª–æ–≥–∏—Ä–æ–∞–≤—Ç—å –≥—Ä–∞—Ñ–∏–∫–∏ –ø–æ—Å–ª–µ —ç—Ç–∞–ø–∞ EDA

RUN_NAME = 'no_model'
experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    run_id = run.info.run_id 


run = mlflow.get_run(run_id) 
assert (run.info.status =='FINISHED')

2024/11/02 15:21:56 INFO mlflow.tracking._tracking_service.client: üèÉ View run no_model at: http://127.0.0.1:5000/#/experiments/1/runs/a2eaf0bf1c4b47b8b85c9a106da62687.
2024/11/02 15:21:56 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1.


In [42]:
from sklearn.preprocessing import QuantileTransformer, SplineTransformer, PolynomialFeatures, MinMaxScaler

In [43]:
X_train_sklearn = X_train.copy()

In [44]:
pf = PolynomialFeatures(degree=2)

In [45]:
X_train_sklearn

  has_large_values = (abs_vals > 1e6).any()


Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
1741,946,1,1.400391,0,9,0,26,0.099976,186,6,10,273,891,1637,7,3,6,1,0,1
232,1715,0,1.000000,1,4,1,31,0.500000,83,8,17,638,1615,625,17,6,13,1,0,0
1675,1630,1,2.800781,1,0,1,32,0.899902,80,6,1,712,1726,1751,12,10,20,1,0,1
470,752,0,0.500000,1,1,0,48,0.700195,87,7,13,164,728,3860,7,5,16,0,0,0
1915,936,0,1.000000,0,1,0,18,0.199951,153,3,18,1330,1686,2391,12,6,3,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
654,666,1,0.500000,1,7,1,54,0.399902,81,3,13,58,1353,1254,15,9,9,1,1,1
426,1190,1,2.199219,1,9,0,47,0.300049,186,6,10,1417,1441,624,9,3,19,1,1,1
690,1403,0,2.699219,0,2,1,26,0.099976,164,5,3,461,1251,3371,13,9,9,1,0,1
736,503,0,2.500000,0,3,0,57,0.600098,185,6,11,778,1291,305,11,8,16,0,0,1


In [46]:
pf.fit_transform(X_train_sklearn[['m_dep','battery_power']])

array([[1.00000000e+00, 9.99755859e-02, 9.46000000e+02, 9.99511778e-03,
        9.45769043e+01, 8.94916000e+05],
       [1.00000000e+00, 5.00000000e-01, 1.71500000e+03, 2.50000000e-01,
        8.57500000e+02, 2.94122500e+06],
       [1.00000000e+00, 8.99902344e-01, 1.63000000e+03, 8.09824228e-01,
        1.46684082e+03, 2.65690000e+06],
       ...,
       [1.00000000e+00, 9.99755859e-02, 1.40300000e+03, 9.99511778e-03,
        1.40265747e+02, 1.96840900e+06],
       [1.00000000e+00, 6.00097656e-01, 5.03000000e+02, 3.60117197e-01,
        3.01849121e+02, 2.53009000e+05],
       [1.00000000e+00, 1.99951172e-01, 7.70000000e+02, 3.99804711e-02,
        1.53962402e+02, 5.92900000e+05]])

In [47]:
sp = SplineTransformer(n_knots=3, degree=3)

In [48]:
sp.fit_transform(X_train_sklearn[['px_height']])

array([[0.06690626, 0.60688358, 0.32320198, 0.00300818, 0.        ],
       [0.00772773, 0.38764177, 0.56078677, 0.04384373, 0.        ],
       [0.00375873, 0.33656692, 0.59811978, 0.06155457, 0.        ],
       ...,
       [0.02664969, 0.50540044, 0.45201833, 0.01593154, 0.        ],
       [0.00163569, 0.2917294 , 0.6257347 , 0.08090021, 0.        ],
       [0.01031397, 0.41172227, 0.5411553 , 0.03680847, 0.        ]])

In [49]:
qt = QuantileTransformer()

In [50]:
qt.fit_transform(X_train_sklearn[['px_height']])

array([[0.22872873],
       [0.54554555],
       [0.60047141],
       ...,
       [0.4034034 ],
       [0.64314314],
       [0.51588829]])

In [51]:
pf = PolynomialFeatures(degree=2)
qt = QuantileTransformer()
sp = SplineTransformer(n_knots=3, degree=3)

In [52]:
pf_pipeline = Pipeline(steps=[
    ('poly', pf),
    ('scale', StandardScaler())
])

In [53]:
preprocessor_sklearn = ColumnTransformer(
    transformers=[
        ('num', s_scaler, num_features),  # –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –¥–ª—è —á–∏—Å–ª–æ–≤—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
        ('cat', l_encoder, cat_features), # –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∞–ª—å–Ω—ã—Ö –ø—Ä–∏–∑–Ω–∞–∫–æ–≤
        ('quantile', qt,num_features),
        ('poly', pf_pipeline, ['m_dep', 'battery_power']), # –í –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è –¥–æ–±–∞–≤–ª—è–µ–º —Å–æ–∑–¥–∞–Ω–Ω—ã–π —Ä–∞–Ω–µ–µ pipeline
        ('spline', sp, ['px_height'])
    ],
    remainder='drop',
    ) # –£–¥–∞–ª—è–µ–º —Å—Ç–æ–ª–±—Ü—ã, –∫–æ—Ç–æ—Ä—ã–µ –Ω–µ –∑–∞—Ç—Ä–æ–Ω—É—Ç—ã –ø—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è

In [54]:
X_train_sklearn[['m_dep', 'battery_power']] = X_train_sklearn[['m_dep', 'battery_power']].astype('float64')
X_train_sklearn[['m_dep', 'battery_power']] = X_train_sklearn[['m_dep', 'battery_power']].astype('float64')

In [55]:
X_train_sklearn

  has_large_values = (abs_vals > 1e6).any()


Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
1741,946.0,1,1.400391,0,9,0,26,0.099976,186,6,10,273,891,1637,7,3,6,1,0,1
232,1715.0,0,1.000000,1,4,1,31,0.500000,83,8,17,638,1615,625,17,6,13,1,0,0
1675,1630.0,1,2.800781,1,0,1,32,0.899902,80,6,1,712,1726,1751,12,10,20,1,0,1
470,752.0,0,0.500000,1,1,0,48,0.700195,87,7,13,164,728,3860,7,5,16,0,0,0
1915,936.0,0,1.000000,0,1,0,18,0.199951,153,3,18,1330,1686,2391,12,6,3,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
654,666.0,1,0.500000,1,7,1,54,0.399902,81,3,13,58,1353,1254,15,9,9,1,1,1
426,1190.0,1,2.199219,1,9,0,47,0.300049,186,6,10,1417,1441,624,9,3,19,1,1,1
690,1403.0,0,2.699219,0,2,1,26,0.099976,164,5,3,461,1251,3371,13,9,9,1,0,1
736,503.0,0,2.500000,0,3,0,57,0.600098,185,6,11,778,1291,305,11,8,16,0,0,1


In [56]:
X_train_sklearn_raw = preprocessor_sklearn.fit_transform(X_train_sklearn, y_train)


In [57]:
X_train_sklearn = pd.DataFrame(X_train_sklearn_raw, columns=preprocessor_sklearn.get_feature_names_out())

In [58]:
with pd.option_context('display.max_rows', 5, 'display.max_columns', None):
    display (X_train_sklearn)

Unnamed: 0,num__battery_power,num__clock_speed,num__fc,num__int_memory,num__m_dep,num__mobile_wt,num__n_cores,num__pc,num__px_height,num__px_width,num__ram,num__sc_h,num__sc_w,num__talk_time,cat__blue_0,cat__blue_1,cat__blue_2,cat__blue_3,cat__dual_sim_0,cat__dual_sim_1,cat__dual_sim_2,cat__dual_sim_3,cat__four_g_0,cat__four_g_1,cat__four_g_2,cat__four_g_3,cat__three_g_0,cat__three_g_1,cat__three_g_2,cat__three_g_3,cat__touch_screen_0,cat__touch_screen_1,cat__touch_screen_2,cat__touch_screen_3,cat__wifi_0,cat__wifi_1,cat__wifi_2,cat__wifi_3,quantile__battery_power,quantile__clock_speed,quantile__fc,quantile__int_memory,quantile__m_dep,quantile__mobile_wt,quantile__n_cores,quantile__pc,quantile__px_height,quantile__px_width,quantile__ram,quantile__sc_h,quantile__sc_w,quantile__talk_time,poly__1,poly__m_dep,poly__battery_power,poly__m_dep^2,poly__m_dep battery_power,poly__battery_power^2,spline__px_height_sp_0,spline__px_height_sp_1,spline__px_height_sp_2,spline__px_height_sp_3,spline__px_height_sp_4
0,-0.656113,-0.138190,1.093728,-0.372868,-1.375675,1.246637,0.620719,0.046466,-0.870069,-0.861281,-0.454690,-1.458894,-1.201724,-0.912773,0.261259,0.237522,0.242325,0.258890,0.260750,0.226774,0.272086,0.240388,0.246354,0.236580,0.282874,0.234187,0.250775,0.227564,0.269339,0.252321,0.252980,0.236272,0.264910,0.245837,0.247736,0.245429,0.250027,0.256807,0.318318,0.462462,0.839840,0.389890,0.000000,0.868869,0.674174,0.526026,0.228729,0.258258,0.362454,0.094595,0.000000,0.234735,0.0,-1.375675,-0.656113,-1.052531,-1.184632,-0.742832,0.066906,0.606884,0.323202,0.003008,0.0
1,1.107839,-0.631921,-0.062934,-0.099164,0.000693,-1.615754,1.496885,1.218069,-0.051504,0.806629,-1.381706,1.020950,-0.392308,0.359910,0.239400,0.237130,0.286296,0.237168,0.244094,0.239325,0.255939,0.260642,0.248330,0.230434,0.252817,0.268417,0.247698,0.227702,0.273828,0.250771,0.247124,0.240171,0.260973,0.251732,0.245953,0.222764,0.289955,0.241323,0.824324,0.359359,0.580080,0.473974,0.507007,0.031031,1.000000,0.856857,0.545546,0.724725,0.110595,0.804805,0.432432,0.600601,0.0,0.000693,1.107839,-0.273520,0.539330,1.126755,0.007728,0.387642,0.560787,0.043844,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,-1.672278,1.217764,-0.294267,1.324097,0.345101,1.218847,0.620719,0.213838,0.262466,0.060216,-1.674833,-0.466957,0.147303,0.905345,0.249417,0.244681,0.270571,0.235329,0.252925,0.238865,0.271643,0.236566,0.250611,0.228724,0.274910,0.245754,0.234528,0.263010,0.229811,0.272616,0.231526,0.245344,0.275434,0.247692,0.260348,0.244216,0.239683,0.255748,0.002002,0.847347,0.507007,0.880380,0.601101,0.855856,0.674174,0.579580,0.643143,0.516016,0.011191,0.340841,0.607608,0.756757,0.0,0.345101,-1.672278,0.083899,-0.716263,-1.329303,0.001636,0.291729,0.625735,0.080900,0.0
1076,-1.059827,-0.015360,0.399731,1.543060,-1.031688,1.524539,-0.255447,-0.120906,-0.129996,0.502535,1.227130,-1.458894,-1.201724,-1.276396,0.251176,0.227713,0.274619,0.246492,0.244772,0.240077,0.258750,0.256400,0.253551,0.220409,0.281942,0.244094,0.257256,0.242672,0.237975,0.262086,0.260320,0.223867,0.274908,0.240902,0.258913,0.241054,0.247799,0.252232,0.187732,0.498999,0.700200,0.943944,0.215716,0.948448,0.408909,0.464965,0.515888,0.648854,0.850203,0.094595,0.000000,0.123624,0.0,-1.031688,-1.059827,-0.955204,-1.050439,-1.018765,0.010314,0.411722,0.541155,0.036808,0.0


In [59]:
pipeline_sklearn = Pipeline(steps=[
    ('transform', preprocessor_sklearn),
    ('model', classifier)
])

model_sklearn = pipeline_sklearn.fit(X_train, y_train)

In [60]:
model_sklearn


In [61]:
predictions = model_sklearn.predict(X_test) 
metrics = {}
metrics["precision"] = precision_score(y_test, predictions, average='weighted')   
metrics["recall"] = recall_score(y_test, predictions, average='weighted')
#metrics["roc_auc"] = roc_auc_score(y_test, predictions, average='ovo')
metrics["f1"] = f1_score(y_test, predictions, average='weighted')

metrics

{'precision': np.float64(0.8605753343472641),
 'recall': np.float64(0.8555555555555555),
 'f1': np.float64(0.8568484175419909)}

In [63]:
experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
RUN_NAME = 'fe_sklearn'

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    # –ø–æ–ª—É—á–∞–µ–º —É–Ω–∏–∫–∞–ª—å–Ω—ã–π –∏–¥–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ç–æ—Ä –∑–∞–ø—É—Å–∫–∞ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞
    run_id = run.info.run_id 
    mlflow.sklearn.log_model(model_sklearn, 
                             artifact_path="models",
                             signature=signature,
                             input_example=input_example,
                             pip_requirements=req_file
                             )
    mlflow.log_metrics(metrics)
    mlflow.log_params(model_sklearn.get_params())

run = mlflow.get_run(run_id) 
assert (run.info.status =='FINISHED')

Downloading artifacts: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [00:00<00:00, 15.21it/s]
2024/11/02 15:42:29 INFO mlflow.tracking._tracking_service.client: üèÉ View run fe_sklearn at: http://127.0.0.1:5000/#/experiments/1/runs/3502ec4aa6fa4a45bb2c8a28d446f570.
2024/11/02 15:42:29 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/1.


In [68]:
import numpy as np

In [69]:
import sys
sys.modules.keys()



In [70]:
import construct
print(construct.__version__)

2.10.70


In [72]:
import numpy==1.26.4

SyntaxError: invalid syntax (2075665624.py, line 1)

In [71]:
from autofeat import AutoFeatRegressor
transformations = ["1/", "exp", "log", "abs", "sqrt", "^2", "^3", "1+", "1-", "sin", "cos", "exp-", "2^"]

ImportError: Numba needs NumPy 2.0 or less. Got NumPy 2.1.