In [1]:
from ucimlrepo import fetch_ucirepo 
import mlflow
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.preprocessing import StandardScaler
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)
import logging
logging.basicConfig(level=logging.INFO)
from sklearn.linear_model import Lasso
from mlflow.tracking import MlflowClient
import pandas as pd
from sklearn.metrics import accuracy_score
import os 

### EDA

https://archive.ics.uci.edu/dataset/602/dry+bean+dataset

In [2]:
# fetch dataset 
dry_bean = fetch_ucirepo(id=602) 
  
# data (as pandas dataframes) 
X = dry_bean.data.features 
y = dry_bean.data.targets 

In [3]:
X

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRatio,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,Roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.272751,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.998430
2,29380,624.110,212.826130,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199
4,30140,620.134,201.847882,190.279279,1.060798,0.333680,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.941900,0.999166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13606,42097,759.696,288.721612,185.944705,1.552728,0.765002,42508,231.515799,0.714574,0.990331,0.916603,0.801865,0.006858,0.001749,0.642988,0.998385
13607,42101,757.499,281.576392,190.713136,1.476439,0.735702,42494,231.526798,0.799943,0.990752,0.922015,0.822252,0.006688,0.001886,0.676099,0.998219
13608,42139,759.321,281.539928,191.187979,1.472582,0.734065,42569,231.631261,0.729932,0.989899,0.918424,0.822730,0.006681,0.001888,0.676884,0.996767
13609,42147,763.779,283.382636,190.275731,1.489326,0.741055,42667,231.653247,0.705389,0.987813,0.907906,0.817457,0.006724,0.001852,0.668237,0.995222


In [4]:
y.value_counts()

Class   
DERMASON    3546
SIRA        2636
SEKER       2027
HOROZ       1928
CALI        1630
BARBUNYA    1322
BOMBAY       522
Name: count, dtype: int64

In [5]:
X.isna().any()

Area               False
Perimeter          False
MajorAxisLength    False
MinorAxisLength    False
AspectRatio        False
Eccentricity       False
ConvexArea         False
EquivDiameter      False
Extent             False
Solidity           False
Roundness          False
Compactness        False
ShapeFactor1       False
ShapeFactor2       False
ShapeFactor3       False
ShapeFactor4       False
dtype: bool

In [6]:
y.isna().sum()

Class    0
dtype: int64

This dataset is already pretty clean with no missing values and zero categorical variables except the target variable.

In [7]:
le = LabelEncoder()
y = y.to_numpy().ravel()
y_encoded = le.fit_transform(y)
y_encoded

array([5, 5, 5, ..., 3, 3, 3])

In [8]:
mlflow.set_tracking_uri('sqlite:///mlflow.db')
mlflow.set_experiment('exp1')

2025/03/21 13:03:41 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/03/21 13:03:41 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

<Experiment: artifact_location='/Users/anyxling/Documents/usf/spring/mlops/mlruns/1', creation_time=1742587421818, experiment_id='1', last_update_time=1742587421818, lifecycle_stage='active', name='exp1', tags={}>

### Preparation and data log before training

In [9]:
os.makedirs('save_data', exist_ok = True)
X.to_csv('save_data/x_original.csv')
mlflow.log_artifact('save_data/x_original.csv')

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, shuffle=True)

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
X_train_scaled_df = pd.DataFrame(X_train_scaled)
X_test_scaled_df = pd.DataFrame(X_test_scaled)

X_train_scaled_df.to_csv('save_data/x_train_scaled.csv')
mlflow.log_artifact('save_data/x_train_scaled.csv')

X_test_scaled_df.to_csv('save_data/x_test_scaled.csv')
mlflow.log_artifact('save_data/x_test_scaled.csv')

### Experiments

1. Try at least 3 different ML algorithms (e.g. linear regression, decision tree, random forest, etc.).
2. Do hyperparameter tuning for **each** algorithm.

In [None]:
def objective(params, X, y):
    if mlflow.active_run():
        mlflow.end_run()
    with mlflow.start_run():
        classifier_type = params['type']
        del params['type']
        if classifier_type == 'xgb':
            clf = XGBClassifier(**params)
        elif classifier_type == 'rf':
            clf = RandomForestClassifier(**params)  
        elif classifier_type == 'lr':
            clf = LogisticRegression(solver='liblinear', max_iter=200, **params)      
        else:
            return 0
        print(f"Running {classifier_type} with: {params}")
        acc = cross_val_score(clf, X, y).mean()

        mlflow.set_tag("Model", classifier_type)
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", acc)

        clf.fit(X, y)
        mlflow.sklearn.log_model(clf, artifact_path = 'better_models')
        # mlflow.end_run()

        return {'loss': -acc, 'status': STATUS_OK}
    
search_space = hp.choice('classifier_type', [
    {
        'type': 'xgb',
        'n_estimators': hp.randint('xgb_n_estimators', 20, 250),  
        'max_depth': hp.choice('xgb_max_depth', [3, 4, 5]),
        'subsample': hp.uniform('xgb_subsample', 0.7, 1.0)
    },
    {
        'type': 'rf',
        'n_estimators': hp.randint('rf_n_estimators', 20, 250),
        'max_features': hp.randint('rf_max_features', 2, 9),
        'criterion': hp.choice('rf_criterion', ['gini', 'entropy']),
        'max_depth': hp.choice('rf_max_depth', [None, 5, 10])
    },
    {
        'type': 'lr',
        'penalty': hp.choice('lr_penalty', ['l1', 'l2']),
        'C': hp.loguniform('lr_C', -2, 1)
        # 'solver': hp.choice('lr_solver', ['liblinear']) 
    }
])

# Perform search on X_train_scaled
mlflow.set_experiment('exp1')
trials1 = Trials()
best_result1 = fmin(
    fn=lambda params: objective(params, X_train_scaled, y_train),
    space=search_space,
    algo=tpe.suggest,
    max_evals=20,
    trials=trials1
)

Running rf with: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 3, 'n_estimators': 151}
  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]




Running lr with: {'C': 1.3965492677186064, 'penalty': 'l2'}                      
  5%|▌         | 1/20 [00:32<10:18, 32.56s/trial, best loss: -0.8907049785447916]




Running lr with: {'C': 0.373786238443866, 'penalty': 'l2'}                       
 10%|█         | 2/20 [00:38<05:08, 17.13s/trial, best loss: -0.9157783202214654]




Running xgb with: {'max_depth': 3, 'n_estimators': 118, 'subsample': 0.91658819808951}
 15%|█▌        | 3/20 [00:44<03:24, 12.05s/trial, best loss: -0.9157783202214654]




Running lr with: {'C': 2.4501925858654023, 'penalty': 'l2'}                      
 20%|██        | 4/20 [00:53<02:49, 10.61s/trial, best loss: -0.9219320612480507]




Running lr with: {'C': 1.2863558499032253, 'penalty': 'l2'}                      
 25%|██▌       | 5/20 [00:59<02:12,  8.83s/trial, best loss: -0.9219320612480507]




Running xgb with: {'max_depth': 5, 'n_estimators': 223, 'subsample': 0.8585132985089197}
 30%|███       | 6/20 [01:04<01:48,  7.73s/trial, best loss: -0.9219320612480507]




Running lr with: {'C': 2.487374706033776, 'penalty': 'l2'}                       
 35%|███▌      | 7/20 [01:19<02:12, 10.17s/trial, best loss: -0.924412117162775]




Running rf with: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 4, 'n_estimators': 174}
 40%|████      | 8/20 [01:25<01:44,  8.72s/trial, best loss: -0.924412117162775]




Running rf with: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 3, 'n_estimators': 152}
 45%|████▌     | 9/20 [02:10<03:42, 20.21s/trial, best loss: -0.924412117162775]




Running lr with: {'C': 0.2076682951929812, 'penalty': 'l1'}                      
 50%|█████     | 10/20 [02:55<04:37, 27.70s/trial, best loss: -0.924412117162775]




Running rf with: {'criterion': 'gini', 'max_depth': None, 'max_features': 8, 'n_estimators': 241}
 55%|█████▌    | 11/20 [03:24<04:13, 28.14s/trial, best loss: -0.924412117162775]




Running rf with: {'criterion': 'entropy', 'max_depth': None, 'max_features': 4, 'n_estimators': 78}
 60%|██████    | 12/20 [06:00<08:55, 67.00s/trial, best loss: -0.924412117162775]




Running lr with: {'C': 1.3440230694095243, 'penalty': 'l1'}                      
 65%|██████▌   | 13/20 [06:33<06:36, 56.66s/trial, best loss: -0.924412117162775]




Running rf with: {'criterion': 'gini', 'max_depth': None, 'max_features': 5, 'n_estimators': 131}
 70%|███████   | 14/20 [07:30<05:40, 56.82s/trial, best loss: -0.924412117162775]




Running rf with: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 5, 'n_estimators': 92}
 75%|███████▌  | 15/20 [08:25<04:41, 56.31s/trial, best loss: -0.924412117162775]




Running lr with: {'C': 1.2418825016914319, 'penalty': 'l2'}                      
 80%|████████  | 16/20 [08:56<03:14, 48.66s/trial, best loss: -0.924412117162775]




Running rf with: {'criterion': 'gini', 'max_depth': 5, 'max_features': 2, 'n_estimators': 150}
 85%|████████▌ | 17/20 [09:01<01:47, 35.69s/trial, best loss: -0.924412117162775]




Running rf with: {'criterion': 'gini', 'max_depth': 10, 'max_features': 2, 'n_estimators': 165}
 90%|█████████ | 18/20 [09:19<01:00, 30.26s/trial, best loss: -0.924412117162775]




Running xgb with: {'max_depth': 5, 'n_estimators': 23, 'subsample': 0.7952060241996658}
 95%|█████████▌| 19/20 [09:47<00:29, 29.58s/trial, best loss: -0.924412117162775]




100%|██████████| 20/20 [09:52<00:00, 29.65s/trial, best loss: -0.9264321926409036]


3. Do some very basic feature selection, and repeat the above steps with these reduced sets of features.

In [14]:
model = Lasso(alpha=0.1)
model.fit(X_train_scaled, y_train)
selected_indices = np.where(model.coef_ != 0)[0]
selected_indices

array([ 3,  6,  8,  9, 10, 15])

In [15]:
X_train_selected = X_train_scaled[:, selected_indices]
X_train_selected.shape

(10888, 6)

In [16]:
X_train_selected_df = pd.DataFrame(X_train_selected)

X_train_selected_df.to_csv('save_data/x_train_selected.csv')
mlflow.log_artifact('save_data/x_train_selected.csv')

In [None]:
# Perform search on data with selected features
mlflow.set_experiment('exp2')
trials2 = Trials()
best_result2 = fmin(
    fn=lambda params: objective(params, X_train_selected, y_train),
    space=search_space,
    algo=tpe.suggest,
    max_evals=32,
    trials=trials2
)

2025/03/21 13:13:56 INFO mlflow.tracking.fluent: Experiment with name 'exp2' does not exist. Creating a new experiment.


Running xgb with: {'max_depth': 4, 'n_estimators': 214, 'subsample': 0.8321866245486714}
  0%|          | 0/32 [00:00<?, ?trial/s, best loss=?]




Running lr with: {'C': 0.5950623953667654, 'penalty': 'l1'}                      
  3%|▎         | 1/32 [00:13<06:54, 13.38s/trial, best loss: -0.9198199896826029]




Running lr with: {'C': 0.19770670294024464, 'penalty': 'l1'}                     
  6%|▋         | 2/32 [00:20<04:49,  9.66s/trial, best loss: -0.9198199896826029]




Running xgb with: {'max_depth': 3, 'n_estimators': 221, 'subsample': 0.9486943685168057}
  9%|▉         | 3/32 [00:26<03:56,  8.14s/trial, best loss: -0.9198199896826029]




Running lr with: {'C': 0.6582200047536073, 'penalty': 'l2'}                      
 12%|█▎        | 4/32 [00:40<04:45, 10.20s/trial, best loss: -0.9202793374088317]




Running xgb with: {'max_depth': 5, 'n_estimators': 222, 'subsample': 0.7257037976803318}
 16%|█▌        | 5/32 [00:44<03:42,  8.24s/trial, best loss: -0.9202793374088317]




Running lr with: {'C': 0.22068787433737994, 'penalty': 'l1'}                     
 19%|█▉        | 6/32 [00:57<04:11,  9.67s/trial, best loss: -0.9207384742316049]




Running rf with: {'criterion': 'entropy', 'max_depth': 5, 'max_features': 8, 'n_estimators': 85}
 22%|██▏       | 7/32 [01:03<03:34,  8.60s/trial, best loss: -0.9207384742316049]




Running lr with: {'C': 0.3091993426477711, 'penalty': 'l1'}                      
 25%|██▌       | 8/32 [01:36<06:28, 16.18s/trial, best loss: -0.9207384742316049]




Running xgb with: {'max_depth': 4, 'n_estimators': 40, 'subsample': 0.8422913602565749}
 28%|██▊       | 9/32 [01:42<05:01, 13.12s/trial, best loss: -0.9207384742316049]




Running rf with: {'criterion': 'gini', 'max_depth': 10, 'max_features': 2, 'n_estimators': 101}
 31%|███▏      | 10/32 [01:49<04:08, 11.28s/trial, best loss: -0.9211053197022212]




Running lr with: {'C': 0.3377401242088719, 'penalty': 'l1'}                       
 34%|███▍      | 11/32 [02:07<04:38, 13.26s/trial, best loss: -0.9211053197022212]




Running lr with: {'C': 2.4230035611434633, 'penalty': 'l2'}                       
 38%|███▊      | 12/32 [02:14<03:44, 11.22s/trial, best loss: -0.9211053197022212]




Running rf with: {'criterion': 'gini', 'max_depth': 10, 'max_features': 8, 'n_estimators': 205}
 41%|████      | 13/32 [02:18<02:54,  9.17s/trial, best loss: -0.9211053197022212]




Running lr with: {'C': 2.1015258952821187, 'penalty': 'l2'}                       
 44%|████▍     | 14/32 [03:37<09:02, 30.17s/trial, best loss: -0.9211053197022212]




Running rf with: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 2, 'n_estimators': 133}
 47%|████▋     | 15/32 [03:41<06:20, 22.39s/trial, best loss: -0.9211053197022212]




Running rf with: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 5, 'n_estimators': 228}
 50%|█████     | 16/32 [04:11<06:34, 24.66s/trial, best loss: -0.9211053197022212]




Running xgb with: {'max_depth': 3, 'n_estimators': 29, 'subsample': 0.7356376701152232}
 53%|█████▎    | 17/32 [05:52<11:52, 47.49s/trial, best loss: -0.9211053197022212]




Running xgb with: {'max_depth': 5, 'n_estimators': 164, 'subsample': 0.8646151418462389}
 56%|█████▋    | 18/32 [05:56<08:05, 34.65s/trial, best loss: -0.9211053197022212]




Running rf with: {'criterion': 'gini', 'max_depth': None, 'max_features': 6, 'n_estimators': 244}
 59%|█████▉    | 19/32 [06:06<05:53, 27.22s/trial, best loss: -0.9211053197022212]




Running xgb with: {'max_depth': 5, 'n_estimators': 36, 'subsample': 0.7030528084689414}
 62%|██████▎   | 20/32 [07:56<10:25, 52.14s/trial, best loss: -0.9211053197022212]




Running xgb with: {'max_depth': 4, 'n_estimators': 40, 'subsample': 0.8207142752181832}
 66%|██████▌   | 21/32 [08:02<07:00, 38.24s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 5, 'n_estimators': 188, 'subsample': 0.9501159720088528}
 69%|██████▉   | 22/32 [08:08<04:43, 28.36s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 4, 'n_estimators': 212, 'subsample': 0.7729896724977271}
 72%|███████▏  | 23/32 [08:19<03:30, 23.34s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 5, 'n_estimators': 73, 'subsample': 0.9048024153033557}
 75%|███████▌  | 24/32 [08:30<02:37, 19.63s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 5, 'n_estimators': 145, 'subsample': 0.9088800637094727}
 78%|███████▊  | 25/32 [08:38<01:52, 16.08s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 5, 'n_estimators': 73, 'subsample': 0.9941325471784264}
 81%|████████▏ | 26/32 [08:47<01:24, 14.07s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 5, 'n_estimators': 240, 'subsample': 0.9007962163919578}
 84%|████████▍ | 27/32 [08:56<01:02, 12.41s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 5, 'n_estimators': 64, 'subsample': 0.7856623487246495}
 88%|████████▊ | 28/32 [09:23<01:07, 16.93s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 5, 'n_estimators': 34, 'subsample': 0.8893982371114516}
 91%|█████████ | 29/32 [09:33<00:44, 14.68s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 5, 'n_estimators': 174, 'subsample': 0.9502123160478314}
 94%|█████████▍| 30/32 [09:40<00:24, 12.32s/trial, best loss: -0.9230343270682354]




Running xgb with: {'max_depth': 3, 'n_estimators': 100, 'subsample': 0.9889651858392954}
 97%|█████████▋| 31/32 [09:51<00:11, 11.98s/trial, best loss: -0.9230343270682354]




100%|██████████| 32/32 [10:00<00:00, 18.75s/trial, best loss: -0.9230343270682354]


4. Identify the top 3 best models and note these down for later.

In [18]:
# Analyze top 3 runs across all experiments
client = MlflowClient()
experiment = client.get_experiment_by_name('exp2')
experiment_id = experiment.experiment_id

runs = client.search_runs(
    experiment_ids=[experiment_id],
    filter_string="attributes.status = 'FINISHED'",
    order_by=["metrics.accuracy DESC"],
    max_results=3
)

top_runs = [{
    "experiment_name": experiment.name,
    "run_id": run.info.run_id,
    "accuracy": run.data.metrics.get("accuracy"),
    "params": run.data.params,
    "model_type": run.data.tags.get("Model")
} for run in runs]

top_runs

[{'experiment_name': 'exp2',
  'run_id': '47d9a18653ad4b8fa0899d5789401df3',
  'accuracy': 0.9230343270682354,
  'params': {'max_depth': '5',
   'n_estimators': '36',
   'subsample': '0.7030528084689414'},
  'model_type': 'xgb'},
 {'experiment_name': 'exp2',
  'run_id': '688cc0aecbe54447a4e887dc632e3afe',
  'accuracy': 0.9221159268806156,
  'params': {'max_depth': '5',
   'n_estimators': '73',
   'subsample': '0.9048024153033557'},
  'model_type': 'xgb'},
 {'experiment_name': 'exp2',
  'run_id': '50b27c8bca37423e9670267b8c98f1a3',
  'accuracy': 0.9216565791543868,
  'params': {'max_depth': '3',
   'n_estimators': '100',
   'subsample': '0.9889651858392954'},
  'model_type': 'xgb'}]

5. Choose the **final** "best" model that you would deploy or use on future data, stage it (in MLFlow), and run it on the test set to get a final measure of performance. Don't forget to log the test set metric.

In [None]:
logged_model = 'runs:/47d9a18653ad4b8fa0899d5789401df3/better_models' 

In [20]:
sklearn_model = mlflow.sklearn.load_model(logged_model)
sklearn_model

In [21]:
X_test_selected = X_test_scaled[:, selected_indices]
X_test_selected.shape

(2723, 6)

In [22]:
X_test_selected_df = pd.DataFrame(X_test_selected)
X_test_selected_df.to_csv('save_data/x_test_selected.csv')
mlflow.log_artifact('save_data/x_test_selected.csv')

In [None]:
# run the best model on test set
sklearn_model.fit(X_train_selected, y_train)
preds = sklearn_model.predict(X_test_selected)
acc = accuracy_score(y_test, preds)
print(f"Accuracy: {acc:.4f}")

Accuracy: 0.9295


In [None]:
# Log test metric
mlflow.set_experiment("final_eval")
with mlflow.start_run(run_name="Final test evaluation"):
    mlflow.log_metric("test_accuracy", acc)
    mlflow.set_tag("Stage", "Staging")
    mlflow.set_tag("Model_Name", "xgb")

In [None]:
# Register the model
runid = '47d9a18653ad4b8fa0899d5789401df3'
mod_path = f'runs:/{runid}/artifacts/better_models'
mlflow.register_model(model_uri = mod_path, name = 'dry_bean_best_xgb')

Successfully registered model 'dry_bean_best_xgb'.
Created version '1' of model 'dry_bean_best_xgb'.


<ModelVersion: aliases=[], creation_timestamp=1742688835541, current_stage='None', description=None, last_updated_timestamp=1742688835541, name='dry_bean_best_xgb', run_id='47d9a18653ad4b8fa0899d5789401df3', run_link=None, source='/Users/anyxling/Documents/usf/spring/mlops/mlruns/2/47d9a18653ad4b8fa0899d5789401df3/artifacts/artifacts/better_models', status='READY', status_message=None, tags={}, user_id=None, version=1>