In [1]:
# @title $Import \ \ Required \ \ Libraries$
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from imblearn.over_sampling import RandomOverSampler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [2]:
# @title $Upload \ \ Dataset$
df = pd.read_excel('/content/Bankruptcy.xlsx')

In [3]:
# @title $Pipeline \ (SVC \ \ Model)$
# Functions to be used in the pipeline
def drop_duplicates(df):
    return df.drop_duplicates()

def separate_features(df):
    return df.drop('class', axis=1)

def separate_target(df):
    return df['class']

def process_target(y):
    return y.map({'bankruptcy': 1, 'non-bankruptcy': 0})

# Preprocess the data before entering the main pipeline
df = drop_duplicates(df)  # Drop duplicates from the DataFrame
X = separate_features(df)  # Separate features
y = process_target(separate_target(df))  # Separate and process the target variable

# Apply oversampling before fitting the model
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

# Define the model pipeline without oversampling
model_pipeline = Pipeline([
    ('model', SVC(C=0.1, gamma=1, kernel='linear', tol=0.0001, random_state=42, probability=True))
])

# Fit the model with the resampled data
model_pipeline.fit(X_resampled, y_resampled)

# Prediction function
def predict(industrial_risk, management_risk, financial_flexibility, credibility, operating_risk, competitiveness):
    # Convert the input features to a DataFrame for compatibility with the pipeline
    input_data = pd.DataFrame([[industrial_risk, management_risk, financial_flexibility, credibility, competitiveness, operating_risk]],
                              columns=['industrial_risk', 'management_risk', 'financial_flexibility', 'credibility', 'competitiveness', 'operating_risk'])  # Use actual feature names

    # Predict using the trained model
    prediction = model_pipeline.predict(input_data)
    y_proba = model_pipeline.predict_proba(input_data)


    return prediction, y_proba

# Example prediction

result, y_prob = predict(0.5, 0.5, 0.5, 0.5, 0.5, 0.5)
print(predict(0.5, 0.5, 0.5, 0.5, 0.5, 0.5))

if(result == [1]):
    print('Business is heading towards Bankruptcy')
    print('Probability of Business going Bankrupt is',y_prob[0][1] )
else:
    print('Business is in safe, no threat of Bankruptcy')
    print('Probability of Business not going Bankrupt is',y_prob[0][0] )


(array([0]), array([[0.86264541, 0.13735459]]))
Business is in safe, no threat of Bankruptcy
Probability of Business not going Bankrupt is 0.8626454052961277


In [4]:
# @title $Pickle \ \ File$
import pickle
filename = 'bankrupty.pkl'
pickle.dump(model_pipeline, open('bankrupty.pkl','wb'))