In [1]:
"""Sample training script"""

import pickle
from collections import namedtuple
from pathlib import Path
from typing import List

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.svm import SVC

In [2]:
ls

 Volume in drive D is Data
 Volume Serial Number is 64D3-FFEA

 Directory of D:\GAISSA\deploy-GAISSA\scripts

26/04/2023  17:16    <DIR>          .
26/04/2023  17:13    <DIR>          ..
26/04/2023  17:16    <DIR>          .ipynb_checkpoints
26/04/2023  17:14                 0 train.py
26/04/2023  17:16                72 Untitled.ipynb
               2 File(s)             72 bytes
               3 Dir(s)  263.200.305.152 bytes free


In [3]:
MODELS_DIR = Path("models/")
model_wrappers_list: List[dict] = []


# ================ #
# Data preparation #
# ================ #

# Load data
Iris_data = load_iris()

# Split data
Xtrain, Xtest, Ytrain, Ytest = train_test_split(
    Iris_data.data, Iris_data.target, test_size=0.3, random_state=4
)

In [4]:
# ================== #
# LogisticRegression #
# ================== #

print("Creating logistic regression model...")

# Define the Logistic Regression model
LR_parameters = {
    "C": 0.1,
    "max_iter": 20,
    "fit_intercept": True,
    "solver": "liblinear",
    "random_state": 0,
}
LR_model = LogisticRegression(**LR_parameters)

# Train the Model
LR_model.fit(Xtrain, Ytrain)

# Evaluate model accuracy by cross-validation
LR_accuracies = cross_val_score(estimator=LR_model, X=Xtrain, y=Ytrain, cv=10)
LR_metrics = {"accuracy": LR_accuracies.mean()}

# Wrap the trained model in a Model namedtuple and append it to the model list
LR_model_dict = {
    "type": "LogisticRegression",
    "params": LR_parameters,
    "model": LR_model,
    "metrics": LR_metrics,
}
model_wrappers_list.append(LR_model_dict)

print("Logistic regression model created.")
print(LR_model_dict, end="\n\n\n")


Creating logistic regression model...
Logistic regression model created.
{'type': 'LogisticRegression', 'params': {'C': 0.1, 'max_iter': 20, 'fit_intercept': True, 'solver': 'liblinear', 'random_state': 0}, 'model': LogisticRegression(C=0.1, max_iter=20, random_state=0, solver='liblinear'), 'metrics': {'accuracy': 0.9145454545454547}}




In [5]:
# =============================== #
# C-Support Vector Classification #
# =============================== #

print("Creating SVC model...")

# Define the SVC model
SVC_parameters = {"kernel": "linear", "random_state": 0}

SVC_model = SVC(**SVC_parameters)

# Train the Model
SVC_model.fit(Xtrain, Ytrain)

# Evaluate model accuracy by cross-validation
SVC_accuracies = cross_val_score(estimator=SVC_model, X=Xtrain, y=Ytrain, cv=10)
SVC_metrics = {"accuracy": SVC_accuracies.mean()}

# Wrap the trained model in a Model namedtuple and append it to the model list
SVC_model_dict = {
    "type": "SVC",
    "params": SVC_parameters,
    "model": SVC_model,
    "metrics": SVC_metrics,
}

model_wrappers_list.append(SVC_model_dict)

print("SVC model created.")
print(SVC_model_dict, end="\n\n\n")

Creating SVC model...
SVC model created.
{'type': 'SVC', 'params': {'kernel': 'linear', 'random_state': 0}, 'model': SVC(kernel='linear', random_state=0), 'metrics': {'accuracy': 0.9818181818181818}}




In [7]:
type(SVC_model)

sklearn.svm._classes.SVC

In [6]:
model_wrappers_list

[{'type': 'LogisticRegression',
  'params': {'C': 0.1,
   'max_iter': 20,
   'fit_intercept': True,
   'solver': 'liblinear',
   'random_state': 0},
  'model': LogisticRegression(C=0.1, max_iter=20, random_state=0, solver='liblinear'),
  'metrics': {'accuracy': 0.9145454545454547}},
 {'type': 'SVC',
  'params': {'kernel': 'linear', 'random_state': 0},
  'model': SVC(kernel='linear', random_state=0),
  'metrics': {'accuracy': 0.9818181818181818}}]

In [8]:
# ============= #
# Serialization #
# ============= #

print("Serializing model wrappers...")

for wrapped_model in model_wrappers_list:

    pkl_filename = f"{wrapped_model['type']}_model_2.pkl"
    pkl_path = MODELS_DIR / pkl_filename

    with open(pkl_path, "wb") as file:
        pickle.dump(wrapped_model, file)

print("Serializing completed.")

Serializing model wrappers...


FileNotFoundError: [Errno 2] No such file or directory: 'models\\LogisticRegression_model_2.pkl'

In [1]:
import h5py

def save_coefficients(classifier, filename):
    """Save the coefficients of a linear model into a .h5 file."""
    with h5py.File(filename, 'w') as hf:
        hf.create_dataset("coef",  data=classifier.coef_)
        hf.create_dataset("intercept",  data=classifier.intercept_)
        hf.create_dataset("classes", data=classifier.classes_)

def load_coefficients(classifier, filename):
    """Attach the saved coefficients to a linear model."""
    with h5py.File(filename, 'r') as hf:
        coef = hf['coef'][:]
        intercept = hf['intercept'][:]
        classes = hf['classes'][:]
    classifier.coef_ = coef
    classifier.intercept_ = intercept
    classifier.classes_ = classes

In [12]:
save_coefficients(SVC_model,"SVC_model.h5")

 Volume in drive D is Data
 Volume Serial Number is 64D3-FFEA

 Directory of D:\GAISSA\deploy-GAISSA\scripts

26/04/2023  17:26    <DIR>          .
26/04/2023  17:13    <DIR>          ..
26/04/2023  17:19    <DIR>          .ipynb_checkpoints
26/04/2023  17:24            10.777 model_serialization.ipynb
26/04/2023  17:26             2.180 SVC_model.h5
26/04/2023  17:14                 0 train.py
               3 File(s)         12.957 bytes
               3 Dir(s)  263.200.280.576 bytes free


In [14]:
SVC_model_loaded = SVC()
load_coefficients(SVC_model_loaded, "SVC_model.h5")

AttributeError: can't set attribute

In [15]:
f = h5py.File('SVC_model.h5', 'r')

In [18]:
print(f.keys())

<KeysViewHDF5 ['classes', 'coef', 'intercept']>


In [21]:
print(f['classes'])

<HDF5 dataset "classes": shape (3,), type "<i4">


In [22]:
SVC_accuracies = cross_val_score(estimator=f, X=Xtrain, y=Ytrain, cv=10)
SVC_metrics = {"accuracy": SVC_accuracies.mean()}

TypeError: estimator should be an estimator implementing 'fit' method, <HDF5 file "SVC_model.h5" (mode r)> was passed

In [3]:
from transformers import BertTokenizer, BertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained("bert-base-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

# To use it
from transformers import pipeline
unmasker = pipeline('fill-mask', model='bert-base-uncased')
unmasker("Hello I'm a [MASK] model.")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


RuntimeError: Failed to import transformers.pipelines because of the following error (look up to see its traceback):
Descriptors cannot not be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates