In [4]:
#importing the necessary libraries
import numpy as np
import pandas as pd

In [5]:
#load the data
data = pd.read_csv('credit_scores.csv')

In [6]:
#deleting irrelevant columns
data = data.drop(['Name', 'SSN', 'ID', 'Customer_ID'], axis=1)

In [7]:
#setting Credit_Score as the target variable and remmaninng as input variables
X = data.drop('Credit_Score', axis=1)
y = data['Credit_Score']

In [8]:
#splitting the data into training and testing sets random seed of 1
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [9]:
#filling missing values in categorical columns with the mean value and scaling the numerical columns
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Preprocessing for numerical data
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

In [10]:
# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [11]:
# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, X.select_dtypes(include=['int64', 'float64']).columns),
        ('cat', categorical_transformer, X.select_dtypes(include=['object']).columns)
    ])


In [12]:
#developing a support vector machine model
from sklearn.svm import SVC
model = SVC()

In [13]:
#creating and evaluating the pipeline
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [14]:
# Bundle preprocessing and modeling code in a pipeline
my_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                              ('model', model)
                             ])

# Preprocessing of training data, fit model
my_pipeline.fit(X_train, y_train)


In [15]:
# Preprocessing of validation data, get predictions
preds = my_pipeline.predict(X_test)

# Evaluate the model
score = accuracy_score(y_test, preds)
print('Accuracy:', score)
print(classification_report(y_test, preds))

Accuracy: 0.6832809459661727
              precision    recall  f1-score   support

        Good       0.49      0.76      0.60      1018
        Poor       0.72      0.61      0.66      2062
    Standard       0.76      0.70      0.73      3601

    accuracy                           0.68      6681
   macro avg       0.66      0.69      0.66      6681
weighted avg       0.70      0.68      0.69      6681



In [16]:
#saving the model
import joblib
joblib.dump(my_pipeline, 'credit_score_model.pkl')
print("Model dumped!")

Model dumped!
