# Save buckets to files

After the bucketing process has been completed, it's useful to save the buckets to a file.<br>
`skorecard` supports saving bucketers to yaml and json files.

In [1]:
from skorecard.datasets import load_uci_credit_card

X, y = load_uci_credit_card(return_X_y=True)
X.head(4)

Unnamed: 0,EDUCATION,MARRIAGE,LIMIT_BAL,BILL_AMT1
0,1,2,400000.0,201800.0
1,2,2,80000.0,80610.0
2,1,2,500000.0,499452.0
3,1,1,140000.0,450.0


## Saving bucketers

In [2]:
from skorecard.bucketers import DecisionTreeBucketer

bucketer = DecisionTreeBucketer(max_n_bins=10)
bucketer = bucketer.fit(X, y)

Save the bucketer to a yaml file

In [3]:
bucketer.save_yml(open("bucketer.yml","w"))

## Saving bucketing process output

We define the bucketing process and save the process to a yaml file

In [4]:
from skorecard.pipeline import BucketingProcess
from skorecard.bucketers import EqualFrequencyBucketer, OptimalBucketer, AsIsCategoricalBucketer
from sklearn.pipeline import make_pipeline

num_cols = ["LIMIT_BAL", "BILL_AMT1"]
cat_cols = ["EDUCATION", "MARRIAGE"]

bucketing_process = BucketingProcess(
        prebucketing_pipeline=make_pipeline(
                DecisionTreeBucketer(variables=num_cols, max_n_bins=100, min_bin_size=0.05),
                AsIsCategoricalBucketer(variables=cat_cols)
        ),
        bucketing_pipeline=make_pipeline(
                OptimalBucketer(variables=num_cols, max_n_bins=10, min_bin_size=0.05),
                OptimalBucketer(variables=cat_cols,
                        variables_type='categorical',
                        max_n_bins=10,
                        min_bin_size=0.05),
        )
)

bucketing_process.fit(X, y)

BucketingProcess(bucketing_pipeline=SkorecardPipeline(steps=[('optimalbucketer-1',
                                                              OptimalBucketer(variables=['LIMIT_BAL',
                                                                                         'BILL_AMT1'])),
                                                             ('optimalbucketer-2',
                                                              OptimalBucketer(variables=['EDUCATION',
                                                                                         'MARRIAGE'],
                                                                              variables_type='categorical'))]),
                 prebucketing_pipeline=SkorecardPipeline(steps=[('decisiontreebucketer',
                                                                 DecisionTreeBucketer(variables=['LIMIT_BAL',
                                                                                                 'BILL_AMT1'])),

Save the bucketing process to a yaml file

In [5]:
bucketing_process.save_yml(open("bucket_process.yml","w"))

## Saving pipelines

We can save `scikit-learn` pipelines by easily converting them to a `skorecard` pipeline

In [6]:
from sklearn.pipeline import make_pipeline
from skorecard.bucketers import EqualFrequencyBucketer

pipe = make_pipeline(
    EqualFrequencyBucketer(n_bins=10, variables=["BILL_AMT1"]),
    DecisionTreeBucketer(max_n_bins=5, variables=["LIMIT_BAL"])
)
pipe.fit_transform(X, y).head(5)

Unnamed: 0,EDUCATION,MARRIAGE,LIMIT_BAL,BILL_AMT1
0,1,2,4,9
1,2,2,2,7
2,1,2,4,9
3,1,1,3,1
4,2,1,4,7


To save `sklearn pipelines`, `skorecard` implements helper functions

In [7]:
from skorecard.pipeline.pipeline import to_skorecard_pipeline

sk_pipe = to_skorecard_pipeline(pipe)

sk_pipe.save_yml(open("pipe2.yml","w"))