In [44]:
import re
import math
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin

class FullLoanPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.income_mode = None
        self.credit_mode = None
        self.loan_annuity_mode = None
        self.registration_mode = None
        self.id_days_mode = None

    @staticmethod
    def clean_and_floor(value):
        if pd.isna(value):
            return value
        cleaned_value = re.sub(r'[^\d.]', '', str(value))
        if cleaned_value:
            return math.floor(float(cleaned_value))
        return None

    @staticmethod
    def family_categorize(x):
        if x <= 1: return 'upto_1_member'
        elif x == 2: return '2_members'
        elif x == 3: return '3_members'
        else: return 'more_than_3_members'

    @staticmethod
    def annuity_categorize(x):
        if x <= 2: return 'upto_2_percent'
        elif x <= 4: return 'upto_4_percent'
        elif x <= 6: return 'upto_6_percent'
        elif x <= 8: return 'upto_8_percent'
        else: return 'more_than_8_percent'

    @staticmethod
    def income_category(x):
        if x <= 6000: return 'income_band1'
        elif x <= 10000: return 'income_band2'
        elif x <= 15000: return 'income_band3'
        elif x <= 20000: return 'income_band4'
        elif x <= 50000: return 'income_band5'
        else: return 'income_band6'

    @staticmethod
    def credit_category(x):
        if x <= 20000: return 'credit_band1'
        elif x <= 30000: return 'credit_band2'
        elif x <= 40000: return 'credit_band3'
        elif x <= 50000: return 'credit_band4'
        elif x <= 60000: return 'credit_band5'
        else: return 'credit_band6'

    @staticmethod
    def credit_income_category(x):
        if x <= 2: return 'Upto_2_times'
        elif x <= 3: return 'Upto_3_times'
        elif x <= 4: return 'Upto_4_times'
        elif x <= 5: return 'Upto_5_times'
        elif x <= 10: return 'Upto_10_times'
        else: return 'more_than_10_times'

    @staticmethod
    def registration_category(x):
        if x <= 5: return 'upto_5_years'
        elif x <= 10: return 'upto_10_years'
        elif x <= 15: return 'upto_15_years'
        elif x <= 20: return 'upto_20_years'
        elif x <= 30: return 'upto_30_years'
        else: return 'more_than_30_years'

    @staticmethod
    def id_years_category(x):
        if x <= 5: return 'upto_5_years'
        elif x <= 10: return 'upto_10_years'
        else: return 'more_than_10_years'

    @staticmethod
    def convert_employment_days(x):
        if x == 0: return 'no_employment'
        elif x <= 2: return 'upto_2_years'
        elif x <= 5: return 'upto_5_years'
        elif x <= 10: return 'upto_10_years'
        else: return 'more_than_10_years'

    @staticmethod
    def convert_age_days(x):
        if x <= 30: return 'upto_30_years'
        elif x <= 40: return 'upto_40_years'
        elif x <= 50: return 'upto_50_years'
        else: return 'more_than_50_years'

    @staticmethod
    def year_categorize(x):
        if x == 0: return 'same_year'
        elif x == 1: return 'one_year'
        elif x == 2: return 'two_year'
        elif x == 3: return 'three_year'
        elif x == 4: return 'four_year'
        else: return 'more_than_four'

    @staticmethod
    def bureau_categorize(x):
        if x == 0: return 'no_search'
        elif x == 1: return 'one_search'
        elif x == 2: return 'two_search'
        else: return 'more_than_two_search'

    def fit(self, X, y=None):
        self.income_mode = X['Client_Income'].apply(self.clean_and_floor).mode().iloc[0]
        self.credit_mode = X['Credit_Amount'].apply(self.clean_and_floor).mode().iloc[0]
        self.loan_annuity_mode = X['Loan_Annuity'].apply(self.clean_and_floor).mode().iloc[0]
        self.registration_mode = pd.to_numeric(X['Registration_Days'], errors='coerce').mode().iloc[0]
        self.id_days_mode = pd.to_numeric(X['ID_Days'], errors='coerce').mode().iloc[0]
        return self

    def transform(self, X):
        X = X.copy()

        X['Client_Income'] = X['Client_Income'].apply(self.clean_and_floor).fillna(self.income_mode)
        X['Client_Income_category'] = X['Client_Income'].apply(self.income_category)

        X['Credit_Amount'] = X['Credit_Amount'].apply(self.clean_and_floor).fillna(self.credit_mode)
        X['Credit_Amount_category'] = X['Credit_Amount'].apply(self.credit_category)

        X['Credit_to_Income_Ratio'] = (X['Credit_Amount'] / X['Client_Income']).round(2)
        X['Credit_to_Income_Category'] = X['Credit_to_Income_Ratio'].apply(self.credit_income_category)

        X['Loan_Annuity'] = X['Loan_Annuity'].apply(self.clean_and_floor).fillna(self.loan_annuity_mode)
        X['Loan_Annuity_percent'] = ((X['Loan_Annuity'] / X['Credit_Amount']) * 100).round(2)
        X['Loan_Annuity_category'] = X['Loan_Annuity_percent'].apply(self.annuity_categorize)

        X['Client_Family_Members'] = X['Client_Family_Members'].fillna(X['Child_Count'] + 1)
        X['Client_Family_Members_Category'] = X['Client_Family_Members'].apply(self.family_categorize)

        X['Car_Owned'] = X['Car_Owned'].fillna(0).astype(int)
        X['Bike_Owned'] = X['Bike_Owned'].fillna(0).astype(int)
        X['Active_Loan'] = X['Active_Loan'].fillna(0).astype(int)

        X['House_Own'] = X['House_Own'].fillna(X['Own_House_Age'].notnull().astype(int))
        X.drop(columns='Own_House_Age', inplace=True, errors='ignore')

        X['Registration_Days'] = pd.to_numeric(X['Registration_Days'], errors='coerce').fillna(self.registration_mode)
        X['ID_Days'] = pd.to_numeric(X['ID_Days'], errors='coerce').fillna(self.id_days_mode)

        X['Registration_Years'] = (X['Registration_Days'] / 365).round(0)
        X['Registration_Years_Category'] = X['Registration_Years'].apply(self.registration_category)

        X['ID_Years'] = (X['ID_Days'] / 365).round(0)
        X['ID_Years_Category'] = X['ID_Years'].apply(self.id_years_category)

        X['Employed_Days'] = X['Employed_Days'].apply(self.clean_and_floor).replace(365243, 0).fillna(0)
        X['Employed_Days'] = (X['Employed_Days'] / 365).round(0)
        X['Employed_Days_Category'] = X['Employed_Days'].apply(self.convert_employment_days)

        X['Age_Days'] = X['Age_Days'].apply(self.clean_and_floor).fillna(6570)
        X['Age_Days'] = (X['Age_Days'] / 365).round(0)
        X['Age_Days_Category'] = X['Age_Days'].apply(self.convert_age_days)

        X['Phone_Change'] = X['Phone_Change'].fillna(0)
        X['Phone_Change_category'] = (X['Phone_Change'] / 365).astype(int).apply(self.year_categorize)

        X['Credit_Bureau'] = X['Credit_Bureau'].fillna(0)
        X['Credit_Bureau_Category'] = X['Credit_Bureau'].astype(int).apply(self.bureau_categorize)

        


        # Select only the specified columns before returning
        selected_columns = [
            'Client_Income_category', 'Credit_Amount_category', 'Credit_to_Income_Category',
            'Loan_Annuity_category', 'Client_Family_Members_Category', 'Car_Owned', 'Bike_Owned',
            'Active_Loan', 'Accompany_Client', 'Client_Income_Type', 'Client_Education',
            'Client_Marital_Status', 'Loan_Contract_Type', 'Client_Housing_Type',
            'Registration_Years_Category', 'ID_Years_Category', 'Employed_Days_Category',
            'Age_Days_Category', 'House_Own', 'Client_Occupation', 'Cleint_City_Rating',
            'Application_Process_Day', 'Application_Hour_Category', 'Type_Organization_Grouped',
            'Score_Source_2', 'Score_Source_3', 'Social_Circle_Default', 'Phone_Change_category',
            'Credit_Bureau_Category'
        ]
        return X[selected_columns]


In [29]:
import re
import math
import pandas as pd
import numpy as np

class FullLoanPreprocessor:
    @staticmethod
    def clean_and_floor(value):
        if pd.isna(value):
            return value
        cleaned_value = re.sub(r'[^\d.]', '', str(value))
        if cleaned_value:
            return math.floor(float(cleaned_value))
        return None

    @staticmethod
    def family_categorize(x):
        return (
            'upto_1_member' if x <= 1 else
            '2_members' if x == 2 else
            '3_members' if x == 3 else
            'more_than_3_members'
        )

    @staticmethod
    def annuity_categorize(x):
        return (
            'upto_2_percent' if x <= 2 else
            'upto_4_percent' if x <= 4 else
            'upto_6_percent' if x <= 6 else
            'upto_8_percent' if x <= 8 else
            'more_than_8_percent'
        )

    @staticmethod
    def income_category(x):
        return (
            'income_band1' if x <= 6000 else
            'income_band2' if x <= 10000 else
            'income_band3' if x <= 15000 else
            'income_band4' if x <= 20000 else
            'income_band5' if x <= 50000 else
            'income_band6'
        )

    @staticmethod
    def credit_category(x):
        return (
            'credit_band1' if x <= 20000 else
            'credit_band2' if x <= 30000 else
            'credit_band3' if x <= 40000 else
            'credit_band4' if x <= 50000 else
            'credit_band5' if x <= 60000 else
            'credit_band6'
        )

    @staticmethod
    def credit_income_category(x):
        return (
            'Upto_2_times' if x <= 2 else
            'Upto_3_times' if x <= 3 else
            'Upto_4_times' if x <= 4 else
            'Upto_5_times' if x <= 5 else
            'Upto_10_times' if x <= 10 else
            'more_than_10_times'
        )

    @staticmethod
    def registration_category(x):
        return (
            'upto_5_years' if x <= 5 else
            'upto_10_years' if x <= 10 else
            'upto_15_years' if x <= 15 else
            'upto_20_years' if x <= 20 else
            'upto_30_years' if x <= 30 else
            'more_than_30_years'
        )

    @staticmethod
    def id_years_category(x):
        return 'upto_5_years' if x <= 5 else 'upto_10_years' if x <= 10 else 'more_than_10_years'

    @staticmethod
    def convert_employment_days(x):
        if x == 0:
            return 'no_employment'
        return (
            'upto_2_years' if x <= 2 else
            'upto_5_years' if x <= 5 else
            'upto_10_years' if x <= 10 else
            'more_than_10_years'
        )

    @staticmethod
    def convert_age_days(x):
        return (
            'upto_30_years' if x <= 30 else
            'upto_40_years' if x <= 40 else
            'upto_50_years' if x <= 50 else
            'more_than_50_years'
        )

    @staticmethod
    def year_categorize(x):
        return (
            'same_year' if x == 0 else
            'one_year' if x == 1 else
            'two_year' if x == 2 else
            'three_year' if x == 3 else
            'four_year' if x == 4 else
            'more_than_four'
        )

    @staticmethod
    def bureau_categorize(x):
        return (
            'no_search' if x == 0 else
            'one_search' if x == 1 else
            'two_search' if x == 2 else
            'more_than_two_search'
        )

    @staticmethod
    def application_hour_category(x):
        return (
            'upto_6_hours' if x <= 6 else
            'upto_12_hours' if x <= 12 else
            'upto_18_hours' if x <= 18 else
            'more_than_18_hours'
        )

    @staticmethod
    def group_type_organization(org):
        group_mapping = {
            'Self-employed': 'Self-employed',
            'Business Entity Type 1': 'Business',
            'Government': 'Public Sector',
            'Medicine': 'Healthcare',
            'IT': 'Technology',
            'Trade: type 1': 'Trade',
            'Industry: type 1': 'Industry',
            'Other': 'Other'
        }
        return group_mapping.get(org, 'Other')
    
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
    # Clean and convert income-related columns
        df['Client_Income'] = df['Client_Income'].apply(self.clean_and_floor)
        df['Client_Income'] = pd.to_numeric(df['Client_Income'], errors='coerce')
        df['Client_Income_category'] = df['Client_Income'].apply(self.income_category)

        df['Credit_Amount'] = df['Credit_Amount'].apply(self.clean_and_floor)
        df['Credit_Amount'] = pd.to_numeric(df['Credit_Amount'], errors='coerce')
        df['Credit_Amount_category'] = df['Credit_Amount'].apply(self.credit_category)

        df['Credit_to_Income_Ratio'] = (df['Credit_Amount'] / df['Client_Income']).round(2)
        df['Credit_to_Income_Category'] = df['Credit_to_Income_Ratio'].apply(self.credit_income_category)

        df['Loan_Annuity'] = df['Loan_Annuity'].apply(self.clean_and_floor)
        df['Loan_Annuity'] = pd.to_numeric(df['Loan_Annuity'], errors='coerce')
        df['Loan_Annuity_percent'] = ((df['Loan_Annuity'] / df['Credit_Amount']) * 100).round(2)
        df['Loan_Annuity_category'] = df['Loan_Annuity_percent'].apply(self.annuity_categorize)

        df['Client_Family_Members_Category'] = df['Client_Family_Members'].apply(self.family_categorize)

        df['House_Own'] = df['Own_House_Age'].notnull().astype(int)
        df = df.drop(columns='Own_House_Age')

        df['Registration_Years'] = (pd.to_numeric(df['Registration_Days'], errors='coerce') / 365).round(0)
        df['Registration_Years_Category'] = df['Registration_Years'].apply(self.registration_category)

        df['ID_Years'] = (pd.to_numeric(df['ID_Days'], errors='coerce') / 365).round(0)
        df['ID_Years_Category'] = df['ID_Years'].apply(self.id_years_category)

        df['Employed_Days'] = df['Employed_Days'].replace(365243, 0)
        df['Employed_Days'] = (pd.to_numeric(df['Employed_Days'], errors='coerce') / 365).round(0)
        df['Employed_Days_Category'] = df['Employed_Days'].apply(self.convert_employment_days)

        df['Age_Days'] = (pd.to_numeric(df['Age_Days'], errors='coerce') / 365).round(0)
        df['Age_Days_Category'] = df['Age_Days'].apply(self.convert_age_days)

        # Clean: convert to numeric
        df['Phone_Change'] = pd.to_numeric(df['Phone_Change'], errors='coerce')

        # Impute or handle missing values (optional: fill with median or -1)
        df['Phone_Change'] = df['Phone_Change'].fillna(0)  # or use df['Phone_Change'].median()

        # Then divide and categorize
        df['Phone_Change_years'] = (df['Phone_Change'] / 365).round(0)
        df['Phone_Change_category'] = df['Phone_Change_years'].apply(self.year_categorize)

        # Clean
        df['Credit_Bureau'] = pd.to_numeric(df['Credit_Bureau'], errors='coerce')

        # Impute NaNs
        df['Credit_Bureau'] = df['Credit_Bureau'].fillna(0)

        # Categorize
        df['Credit_Bureau_Category'] = df['Credit_Bureau'].astype(int).apply(self.bureau_categorize)


        #df['Phone_Change_category'] = (pd.to_numeric(df['Phone_Change'], errors='coerce') / 365).astype(int).apply(self.year_categorize)
        #df['Credit_Bureau_Category'] = pd.to_numeric(df['Credit_Bureau'], errors='coerce').astype('Int64').apply(self.bureau_categorize)

        df['Application_Hour_Category'] = pd.to_numeric(df['Application_Process_Hour'], errors='coerce').apply(self.application_hour_category)
        df['Type_Organization_Grouped'] = df['Type_Organization'].apply(self.group_type_organization)

        selected_columns = [
            'Client_Income_category', 'Credit_Amount_category', 'Credit_to_Income_Category',
            'Loan_Annuity_category', 'Client_Family_Members_Category', 'Car_Owned', 'Bike_Owned',
            'Active_Loan', 'Accompany_Client', 'Client_Income_Type', 'Client_Education',
            'Client_Marital_Status', 'Loan_Contract_Type', 'Client_Housing_Type',
            'Registration_Years_Category', 'ID_Years_Category', 'Employed_Days_Category',
            'Age_Days_Category', 'House_Own', 'Client_Occupation', 'Cleint_City_Rating',
            'Application_Process_Day', 'Application_Hour_Category', 'Type_Organization_Grouped',
            'Score_Source_2', 'Score_Source_3', 'Social_Circle_Default', 'Phone_Change_category',
            'Credit_Bureau_Category'
        ]

        return df[selected_columns]


    


In [30]:
df = pd.read_csv('dataset.csv')

  df = pd.read_csv('dataset.csv')


In [24]:
df.columns

Index(['ID', 'Client_Income', 'Car_Owned', 'Bike_Owned', 'Active_Loan',
       'House_Own', 'Child_Count', 'Credit_Amount', 'Loan_Annuity',
       'Accompany_Client', 'Client_Income_Type', 'Client_Education',
       'Client_Marital_Status', 'Client_Gender', 'Loan_Contract_Type',
       'Client_Housing_Type', 'Population_Region_Relative', 'Age_Days',
       'Employed_Days', 'Registration_Days', 'ID_Days', 'Own_House_Age',
       'Mobile_Tag', 'Homephone_Tag', 'Workphone_Working', 'Client_Occupation',
       'Client_Family_Members', 'Cleint_City_Rating',
       'Application_Process_Day', 'Application_Process_Hour',
       'Client_Permanent_Match_Tag', 'Client_Contact_Work_Tag',
       'Type_Organization', 'Score_Source_1', 'Score_Source_2',
       'Score_Source_3', 'Social_Circle_Default', 'Phone_Change',
       'Credit_Bureau', 'Default'],
      dtype='object')

In [45]:
# Assuming FullLoanPreprocessor is defined correctly
preprocessor = FullLoanPreprocessor()  # Instantiate it
preprocessor.fit(X, y)                 # Now fit works


In [31]:
df_clean = df.dropna()
df_clean.shape
df_clean.head(10)


print(df.columns)

print(df_clean.columns)

Index(['ID', 'Client_Income', 'Car_Owned', 'Bike_Owned', 'Active_Loan',
       'House_Own', 'Child_Count', 'Credit_Amount', 'Loan_Annuity',
       'Accompany_Client', 'Client_Income_Type', 'Client_Education',
       'Client_Marital_Status', 'Client_Gender', 'Loan_Contract_Type',
       'Client_Housing_Type', 'Population_Region_Relative', 'Age_Days',
       'Employed_Days', 'Registration_Days', 'ID_Days', 'Own_House_Age',
       'Mobile_Tag', 'Homephone_Tag', 'Workphone_Working', 'Client_Occupation',
       'Client_Family_Members', 'Cleint_City_Rating',
       'Application_Process_Day', 'Application_Process_Hour',
       'Client_Permanent_Match_Tag', 'Client_Contact_Work_Tag',
       'Type_Organization', 'Score_Source_1', 'Score_Source_2',
       'Score_Source_3', 'Social_Circle_Default', 'Phone_Change',
       'Credit_Bureau', 'Default'],
      dtype='object')
Index(['ID', 'Client_Income', 'Car_Owned', 'Bike_Owned', 'Active_Loan',
       'House_Own', 'Child_Count', 'Credit_Amount', 'Lo

In [None]:
#df_clean.rename(columns={"Cleint_City_Rating": "Client_City_Rating"}, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean.rename(columns={"Cleint_City_Rating": "Client_City_Rating"}, inplace=True)


In [22]:
df_clean.columns

Index(['ID', 'Client_Income', 'Car_Owned', 'Bike_Owned', 'Active_Loan',
       'House_Own', 'Child_Count', 'Credit_Amount', 'Loan_Annuity',
       'Accompany_Client', 'Client_Income_Type', 'Client_Education',
       'Client_Marital_Status', 'Client_Gender', 'Loan_Contract_Type',
       'Client_Housing_Type', 'Population_Region_Relative', 'Age_Days',
       'Employed_Days', 'Registration_Days', 'ID_Days', 'Own_House_Age',
       'Mobile_Tag', 'Homephone_Tag', 'Workphone_Working', 'Client_Occupation',
       'Client_Family_Members', 'Cleint_City_Rating',
       'Application_Process_Day', 'Application_Process_Hour',
       'Client_Permanent_Match_Tag', 'Client_Contact_Work_Tag',
       'Type_Organization', 'Score_Source_1', 'Score_Source_2',
       'Score_Source_3', 'Social_Circle_Default', 'Phone_Change',
       'Credit_Bureau', 'Default'],
      dtype='object')

In [32]:
X = df_clean.drop(columns=['Default'], errors='ignore')

In [33]:
# Assuming FullLoanPreprocessor is defined correctly
preprocessor = FullLoanPreprocessor()  # Instantiate it
#preprocessor.fit(X, y)   

In [34]:
X_transformed = preprocessor.transform(X)

In [35]:
import pickle



# Save to a .pkl file
with open('data_preprocessing_v3.pkl', 'wb') as f:
    pickle.dump(preprocessor, f)

print("Preprocessor saved successfully.")


Preprocessor saved successfully.


In [13]:
preprocessor1 = pickle.load(open('data_preprocessing.pkl', 'rb'))

           ID  Client_Income  Car_Owned  Bike_Owned  Active_Loan  House_Own  \
102  12127046          27000        1.0         0.0          0.0          1   

     Child_Count  Credit_Amount  Loan_Annuity Accompany_Client  ...  \
102          3.0          53366          4003            Alone  ...   

    Social_Circle_Default Phone_Change Credit_Bureau Client_Income_category  \
102                 0.033       1805.0           4.0           income_band5   

    Credit_Amount_category Credit_to_Income_Ratio Credit_to_Income_Category  \
102           credit_band5                   1.98              Upto_2_times   

    Loan_Annuity_percent Loan_Annuity_category Client_Family_Members_Category  
102                  7.5        upto_8_percent            more_than_3_members  

[1 rows x 46 columns]


AttributeError: 'NoneType' object has no attribute 'reshape'

In [78]:
X_transformed = preprocessor1.transform(X[0:1])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Client_Income'] = df['Client_Income'].apply(self.clean_and_floor)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Client_Income'] = pd.to_numeric(df['Client_Income'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Client_Income_category'] = df['Client_Income'].apply(self

In [75]:
catboost_model = pickle.load(open('catboost_model_version_1.pkl', 'rb'))
scalar = pickle.load(open('data_scaling_latest.pkl', 'rb'))

In [79]:
x1 = scalar.transform(X_transformed)

In [80]:
y_pred = catboost_model.predict(x1)

In [81]:
print(y_pred)

[0]


In [50]:
X_transformed.columns

Index(['ID', 'Client_Income', 'Car_Owned', 'Bike_Owned', 'Active_Loan',
       'House_Own', 'Child_Count', 'Credit_Amount', 'Loan_Annuity',
       'Accompany_Client', 'Client_Income_Type', 'Client_Education',
       'Client_Marital_Status', 'Client_Gender', 'Loan_Contract_Type',
       'Client_Housing_Type', 'Population_Region_Relative', 'Age_Days',
       'Employed_Days', 'Registration_Days', 'ID_Days', 'Mobile_Tag',
       'Homephone_Tag', 'Workphone_Working', 'Client_Occupation',
       'Client_Family_Members', 'Cleint_City_Rating',
       'Application_Process_Day', 'Application_Process_Hour',
       'Client_Permanent_Match_Tag', 'Client_Contact_Work_Tag',
       'Type_Organization', 'Score_Source_1', 'Score_Source_2',
       'Score_Source_3', 'Social_Circle_Default', 'Phone_Change',
       'Credit_Bureau', 'Client_Income_category', 'Credit_Amount_category',
       'Credit_to_Income_Ratio', 'Credit_to_Income_Category',
       'Loan_Annuity_percent', 'Loan_Annuity_category',
       '

In [None]:
catboost_model = pickle.load(open('catboost_model_version_1.pkl', 'rb'))
scalar = pickle.load(open('data_scaling_latest.pkl', 'rb'))
preprocessor = pickle.load(open('data_preprocessing_v3.pkl', 'rb'))

In [44]:
import pickle
import cloudpickle

# Load the original files (only once!)
catboost_model = pickle.load(open('catboost_model_version_1.pkl', 'rb'))
scaler = pickle.load(open('data_scaling_latest.pkl', 'rb'))
preprocessor = pickle.load(open('data_preprocessing_v3.pkl', 'rb'))



In [45]:
# Re-save them using cloudpickle
with open('catboost_model_version_1.pkl', 'wb') as f:
    cloudpickle.dump(catboost_model, f)

with open('data_scaling_latest.pkl', 'wb') as f:
    cloudpickle.dump(scaler, f)

with open('data_preprocessing_v3.pkl', 'wb') as f:
    cloudpickle.dump(preprocessor, f)


In [43]:
catboost_model = pickle.load(open('catboost_model_version_2.pkl', 'rb'))