<a href="https://colab.research.google.com/github/bhas10bc/my-repo/blob/main/customer_segment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Normalization,InputLayer, Dense
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [5]:
# Load your dataset (replace 'data.csv' with your dataset file)
data = pd.read_csv('credit_train.csv')
data.head()

data.shape

(100000, 11)

In [6]:
imputer = IterativeImputer(max_iter=10, random_state=0)
data['Credit Score'] = imputer.fit_transform(data[['Credit Score']])
data['Annual Income'] = imputer.fit_transform(data[['Annual Income']])
data['Monthly Debt'] = imputer.fit_transform(data[['Monthly Debt']])
data['Years of Credit History'] = imputer.fit_transform(data[['Years of Credit History']])
data['Number of Open Accounts'] = imputer.fit_transform(data[['Number of Open Accounts']])
data['Number of Credit Problems'] = imputer.fit_transform(data[['Number of Credit Problems']])
data['Current Credit Balance'] = imputer.fit_transform(data[['Current Credit Balance']])
data['Maximum Open Credit'] = imputer.fit_transform(data[['Maximum Open Credit']])
data['Bankruptcies'] = imputer.fit_transform(data[['Bankruptcies']])
data['Tax Liens'] = imputer.fit_transform(data[['Tax Liens']])

In [7]:
tensor_data = tf.constant(data)
tensor_data = tf.cast(tensor_data, tf.float32)
print(tensor_data)

tf.Tensor(
[[4.4541200e+05 7.0900000e+02 1.1674930e+06 ... 4.1674600e+05
  1.0000000e+00 0.0000000e+00]
 [2.6232800e+05 1.0764561e+03 1.3782765e+06 ... 8.5078400e+05
  0.0000000e+00 0.0000000e+00]
 [1.0000000e+08 7.4100000e+02 2.2318920e+06 ... 7.5009000e+05
  0.0000000e+00 0.0000000e+00]
 ...
 [1.0313600e+05 7.4200000e+02 1.1505450e+06 ... 5.3754800e+05
  1.0000000e+00 0.0000000e+00]
 [5.3033200e+05 7.4600000e+02 1.7175240e+06 ... 7.3825400e+05
  0.0000000e+00 0.0000000e+00]
 [1.0000000e+08 7.4300000e+02 9.3518000e+05 ... 9.1014000e+04
  1.0000000e+00 0.0000000e+00]], shape=(100000, 11), dtype=float32)


In [8]:
tensor_data = tf.random.shuffle(tensor_data)
print(tensor_data[:5])

tf.Tensor(
[[7.8333200e+05 7.0900000e+02 2.8993050e+06 6.4751238e+04 1.6600000e+01
  1.5000000e+01 0.0000000e+00 3.2801600e+05 6.6748000e+05 0.0000000e+00
  0.0000000e+00]
 [1.0000000e+08 7.4700000e+02 1.4223020e+06 1.1164970e+04 1.6600000e+01
  1.0000000e+01 0.0000000e+00 3.3759200e+05 6.7628000e+05 0.0000000e+00
  0.0000000e+00]
 [1.7540600e+05 1.0764561e+03 1.3782765e+06 1.9324141e+04 1.9299999e+01
  1.4000000e+01 0.0000000e+00 2.3884900e+05 8.6697600e+05 0.0000000e+00
  0.0000000e+00]
 [5.4621600e+05 6.5700000e+02 1.2830890e+06 8.4473999e+02 1.2300000e+01
  6.0000000e+00 0.0000000e+00 6.9255000e+04 5.8957800e+05 0.0000000e+00
  0.0000000e+00]
 [3.8478000e+05 7.1000000e+02 1.3513370e+06 2.5787939e+04 1.3400000e+01
  1.6000000e+01 0.0000000e+00 4.9365800e+05 6.4154200e+05 0.0000000e+00
  0.0000000e+00]], shape=(5, 11), dtype=float32)


In [9]:
# Define features and target variable
selected_columns_indices = [1, 2, 3, 7, 8]
X = tf.gather(tensor_data, selected_columns_indices, axis=1)
print(X.shape)

(100000, 5)


In [10]:
y = tensor_data[:, 0]
print(y.shape)
y = tf.expand_dims(y, axis = -1)
print(y.shape)

(100000,)
(100000, 1)


In [11]:
TRAIN_RATIO = 0.8
VAL_RATIO = 0.1
TEST_RATIO = 0.1
DATASET_SIZE = len(X)

In [12]:
X_train = X[:int(DATASET_SIZE * TRAIN_RATIO)]
y_train = y[:int(DATASET_SIZE * TRAIN_RATIO)]
print(X_train.shape)
print(y_train.shape)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(32).prefetch(tf.data.AUTOTUNE)


(80000, 5)
(80000, 1)


In [13]:
X_val = X[int(DATASET_SIZE * TRAIN_RATIO):int(DATASET_SIZE * (TRAIN_RATIO + VAL_RATIO))]
y_val = y[int(DATASET_SIZE * TRAIN_RATIO):int(DATASET_SIZE * (TRAIN_RATIO + VAL_RATIO))]
print(X_val.shape)
print(y_val.shape)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(32).prefetch(tf.data.AUTOTUNE)

(10000, 5)
(10000, 1)


In [14]:
X_test = X[int(DATASET_SIZE * (TRAIN_RATIO + VAL_RATIO)):]
y_test = y[int(DATASET_SIZE * (TRAIN_RATIO + VAL_RATIO)):]
print(X_test.shape)
print(y_test.shape)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(32).prefetch(tf.data.AUTOTUNE)

(10000, 5)
(10000, 1)


In [15]:

y = tensor_data[:, -1]
y = tf.expand_dims(y, axis=-1)

In [16]:
normalizer = Normalization()
normalizer.adapt(X_train)
normalizer(X)[:5]

<tf.Tensor: shape=(5, 5), dtype=float32, numpy=
array([[-2.7701718e-01,  1.5247767e+00,  3.7866156e+00,  8.8617653e-02,
        -1.2044505e-02],
       [-2.4836268e-01,  4.4692297e-02, -5.9798080e-01,  1.1395409e-01,
        -1.1101256e-02],
       [ 6.8760608e-05,  5.7494699e-04,  6.9628134e-02, -1.4730275e-01,
         9.3389358e-03],
       [-3.1622857e-01, -9.4811141e-02, -1.4424143e+00, -5.9601903e-01,
        -2.0394612e-02],
       [-2.7626309e-01, -2.6420757e-02,  5.9851640e-01,  5.2687764e-01,
        -1.4824729e-02]], dtype=float32)>

In [17]:
model = tf.keras.Sequential([
    InputLayer(input_shape=(5,)),
    normalizer,
    Dense(128, activation="relu"),
    Dense(128, activation="relu"),
    Dense(128, activation="relu"),
    Dense(1),
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizati  (None, 5)                 11        
 on)                                                             
                                                                 
 dense (Dense)               (None, 128)               768       
                                                                 
 dense_1 (Dense)             (None, 128)               16512     
                                                                 
 dense_2 (Dense)             (None, 128)               16512     
                                                                 
 dense_3 (Dense)             (None, 1)                 129       
                                                                 
Total params: 33932 (132.55 KB)
Trainable params: 33921 (132.50 KB)
Non-trainable params: 11 (48.00 Byte)
________________

In [18]:
model.compile(optimizer = Adam(learning_rate = 0.1),
              loss = MeanAbsoluteError(),
              metrics = RootMeanSquaredError())

In [19]:
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val_loss'])
plt.show()

In [None]:
plt.plot(history.history['root_mean_squared_error'])
plt.plot(history.history['val_root_mean_squared_error'])
plt.title('model performance')
plt.ylabel('rmse')
plt.xlabel('epoch')
plt.legend(['train', 'val'])
plt.show()

In [22]:
model.evaluate(test_dataset)



[11551642.0, 33783384.0]

In [23]:
model.predict(tf.expand_dims(X_test[0], axis = 0 ))



array([[176696.52]], dtype=float32)

In [24]:
y_pred = list(model.predict(X_test)[:,0])
print(y_pred)

[176696.52, 253972.94, 699502.7, 291260.94, 594422.0, 249413.4, 564760.7, 425547.06, 174621.55, 541942.75, 963887.7, 574258.25, 138922.66, 270061.4, 426006.75, 527604.6, 412852.25, 459812.0, 658381.8, 468838.22, 261476.6, 471261.4, 171540.81, 336808.53, 162357.66, 447632.4, 437218.47, 290417.53, 145833.88, 465728.6, 379463.47, 119556.125, 209870.84, 428640.9, 399149.6, 153307.42, 251789.83, 398070.4, 230776.62, 71893.4, 299054.03, 335741.03, 260389.9, 330974.62, 253929.88, 509189.16, 390288.6, 267406.2, 471672.38, 307296.28, 500710.0, 524246.47, 171107.67, 293166.94, 468078.12, 325648.34, 378881.28, 218637.69, 460142.72, 182545.9, 403441.47, 237890.2, 191687.2, 273482.8, 529205.5, 499795.47, 387693.0, 268942.9, 329593.38, 418076.94, 487455.3, 214672.5, 192874.61, 328899.4, 97714.73, 368398.28, 337619.16, 226520.95, 304693.4, 480745.75, 218120.44, 492111.06, 385609.75, 287505.84, 334311.72, 351221.06, 234834.42, 144184.45, 214241.3, 336523.4, 629739.06, 266869.97, 212308.36, 475900.0, 4

In [27]:
sample_input_data = tf.constant([[721, 806949, 8741.74, 256329, 386958]])

In [28]:
sample_predictions = model.predict(sample_input_data)

# Extract and print the predicted values
sample_predicted_values = list(sample_predictions[:, 0])
print(sample_predicted_values)

[269689.56]
