In [None]:
## import model dependencies
import os
import numpy as np
from qwak.feature_store.offline import OfflineClient

import qwak
from qwak.model.base import QwakModel
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier, Pool
import pandas as pd
from qwak import qwak_timer
from qwak.model.adapters import JsonOutputAdapter
import matplotlib.pyplot as plt


### Define Hyperparameteres

In [None]:
params = {
    'iterations': 50,
    'learning_rate': 0.2,
    'eval_metric': 'Accuracy',
    'logging_level': 'Silent',
    'use_best_model': True
}


In [None]:
### Define Model
cb = CatBoostClassifier(**params)

### Load Training Data

In [None]:
df = pd.read_csv("main/training_data.csv", index_col=0)

In [None]:
df

### Build Model

In [None]:
train_df = df[["job", "credit_amount", "duration", "purpose"]]
        
y = df["risk"].map({'good':1,'bad':0})


categorical_features_indices = np.where(train_df.dtypes != np.float64)[0]
X_train, X_validation, y_train, y_validation = train_test_split(train_df, y, test_size=0.25, random_state=42)

train_pool = Pool(X_train, y_train, cat_features=categorical_features_indices)
validate_pool = Pool(X_validation, y_validation, cat_features=categorical_features_indices)

print("Fitting catboost model")
cb.fit(train_pool, eval_set=validate_pool)

### Evaluate Model

In [None]:
y_predicted = cb.predict(X_validation)

In [None]:
f1 = f1_score(y_validation, y_predicted)
params['f1_score'] = f1

In [None]:
print(f1)

In [None]:
params = {k:str(v) for k,v in zip(params.keys(), params.values())}

In [None]:
params

### Persist Model to Artifactory

In [None]:
import frogml

repository = "test-webinar-ml-local"
name = "credit_risk_catboost"
namespace = "jfrogmldemo"
version = "0.4.2"
dependencies = ["main/conda.yml"]
code_dir = "main"

frogml.catboost.log_model(
    model=cb,
    repository=repository,
    model_name=name,
    version=version,
    dependencies=dependencies,
    properties=params,
    code_dir=code_dir,
)

## Execute Structured Build

In [None]:
import os
import numpy as np
from qwak.feature_store.offline import OfflineClient

import qwak
import frogml
from frogml.catboost import log_model
from qwak.model.base import QwakModel
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier, Pool
import pandas as pd
from qwak import qwak_timer
from qwak.model.adapters import JsonOutputAdapter
import matplotlib.pyplot as plt

RUNNING_FILE_ABSOLUTE_PATH = "main/training_data.csv"


class RiskModel(QwakModel):

    def __init__(self):
        
        self.params = {
            'iterations': 50,
            'learning_rate': 0.2,
            'eval_metric': 'Accuracy',
            'logging_level': 'Silent',
            'use_best_model': True
        }
        self.catboost = CatBoostClassifier(**self.params)
        self.metrics = {
            'accuracy': 95,
            'random_state': 43,
            'test_size': .25
        }
        qwak.log_param(self.params)



    def build(self):
        """
        Build the Qwak model:
            1. Fetch the feature values from the feature store
            2. Train a naive Catboost model
        """
        #file_absolute_path = os.path.dirname(os.path.abspath(__file__))
        #df = pd.read_csv(f"{file_absolute_path}/main/training_data.csv", index_col=0)
        df = pd.read_csv("./main/training_data.csv", index_col=0)

        train_df = df[["job", "credit_amount", "duration", "purpose"]]
        
        y = df["risk"].map({'good':1,'bad':0})


        categorical_features_indices = np.where(train_df.dtypes != np.float64)[0]
        X_train, X_validation, y_train, y_validation = train_test_split(train_df, y, test_size=0.25, random_state=42)

        train_pool = Pool(X_train, y_train, cat_features=categorical_features_indices)
        validate_pool = Pool(X_validation, y_validation, cat_features=categorical_features_indices)

        print("Fitting catboost model")
        self.catboost.fit(train_pool, eval_set=validate_pool)

        y_predicted = self.catboost.predict(X_validation)
        f1 = f1_score(y_validation, y_predicted)
        
        qwak.log_metric({'f1_score': f1})
        qwak.log_metric({'iterations': self.params['iterations']})
        qwak.log_metric({'learning_rate': self.params['learning_rate']})
        qwak.log_metric({'accuracy': self.metrics['accuracy']})
        qwak.log_metric({'random_state': self.metrics['random_state']})
        qwak.log_metric({'test_size': self.metrics['test_size']})

    
    
        import datetime
        self.visualize(self.catboost)
        # qwak.log_file("loss_plot.png", tag="credit_risk_graph")
        

    def visualize(self, model):

        loss = model.evals_result_["learn"]['Logloss']
        validation_loss = model.evals_result_["validation"]['Logloss']
        plt.figure(figsize=(10, 7))
        plt.plot(loss, label="Training Correlation")
        plt.plot(validation_loss, label="Validation Correlation")
        plt.xlabel("Number of trees")
        plt.ylabel("Loss Value")
        plt.title("CatBoost Training Progress - Loss Values")
        plt.legend()
        plt.grid()
        plt.savefig("loss_plot.png")

    def schema(self):
        from qwak.model.schema import ModelSchema, InferenceOutput, FeatureStoreInput, Entity
        user_id = Entity(name="user_id", type=str)
        model_schema = ModelSchema(
            entities=[user_id],
            inputs=[
                FeatureStoreInput(entity=user_id, name='qwak-snowflake-webinar.job'),
                FeatureStoreInput(entity=user_id, name='qwak-snowflake-webinar.credit_amount'),
                FeatureStoreInput(entity=user_id, name='qwak-snowflake-webinar.duration'),
                FeatureStoreInput(entity=user_id, name='qwak-snowflake-webinar.purpose'),

            ],
            outputs=[
                InferenceOutput(name="Risk", type=float)
            ])
        return model_schema

    # @qwak.api(feature_extraction=True)
    def predict(self, df,extracted_df):
        print(extracted_df)
        #### {"user_id": "xxxx-xxx-xxx-xxxx"}
        # analytics_logger.log(column='test',value='value')
        with qwak_timer("test timer"):
            [i for i in range(1000000)]
        renamed = extracted_df.rename(columns={"qwak-snowflake-webinar.job": "job","qwak-snowflake-webinar.credit_amount": "credit_amount", "qwak-snowflake-webinar.duration": "duration","qwak-snowflake-webinar.purpose": "purpose"})
        prediction = pd.DataFrame(self.catboost.predict(renamed[["job", "credit_amount", "duration", "purpose"]]),
                            columns=['Risk'])
        return prediction



### Execute Build

In [None]:
from qwak import QwakClient
from qwak.model.tools import run_local

# Creating an instance of the Qwak client
client = QwakClient()

In [None]:
# Publishing the model to the JFrog ML
model = RiskModel()
model.build()

client.build_model(
  model_id='credit_risk',
  prebuilt_qwak_model=model
)