In [31]:
import tensorflow as tf
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [32]:
df =pd.read_csv("cervical-cancer_csv.csv")

In [33]:
df=df.dropna(subset=["Biopsy"])

In [34]:
X = df.drop('Biopsy', axis=1,inplace=False)
y = df['Biopsy']


In [35]:
numeric_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object', 'bool']).columns

numeric_transformer = SimpleImputer(strategy='mean')
categorical_transformer = SimpleImputer(strategy='most_frequent')

# Create the ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Fit and transform the data
df = pd.DataFrame(preprocessor.fit_transform(X), columns=X.columns)
df=pd.concat([df, y.reset_index(drop=True)], axis=1)



In [36]:
scaler = StandardScaler()
# df = scaler.fit_transform(X)
X = df.drop('Biopsy', axis=1,inplace=False)
y = df['Biopsy']
X_normalized_df = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Concatenate the normalized features with the target variable
df = pd.concat([X_normalized_df, y.reset_index(drop=True)], axis=1)

In [37]:
# Remove duplicate rows
df = df.drop_duplicates()
df.drop(['STDs: Time since first diagnosis', 'STDs: Time since last diagnosis'], axis=1, inplace=True)
categorical_cols = df.select_dtypes(include=['object', 'bool']).columns

In [38]:


X = df.drop('Biopsy', axis=1)
y = df['Biopsy']

print(df)

          Age  Number of sexual partners  First sexual intercourse  \
0   -1.064408                   0.877466                 -0.720724   
1   -1.418269                  -0.940302                 -1.077425   
2    0.822850                  -0.940302                  0.000000   
3    2.946016                   1.483389                 -0.364024   
4    2.238294                   0.271543                  1.419478   
..        ...                        ...                       ...   
830  0.822850                   0.271543                  0.349377   
831  0.586943                  -0.334380                  0.706077   
832 -0.238733                  -0.334380                 -0.007324   
833  0.704897                  -0.334380                  2.489579   
834  0.233082                  -0.334380                  1.062778   

     Num of pregnancies    Smokes  Smokes (years)  Smokes (packs/year)  \
0             -0.928117 -0.422787       -0.305380            -0.208210   
1          

In [39]:
scaler = StandardScaler()
X = df.drop('Biopsy', axis=1,inplace=False)
y = df['Biopsy']
X_normalized_df = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Concatenate the normalized features with the target variable
df = pd.concat([X_normalized_df, y.reset_index(drop=True)], axis=1)

# Save the scaler parameters
scaler_params = {
    'mean': scaler.mean_.tolist(),
    'scale': scaler.scale_.tolist()
}
X = df.drop('Biopsy', axis=1,inplace=False)
y = df['Biopsy']

In [40]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [41]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [42]:
import json
with open('scaler_params.json', 'w') as f:
    json.dump(scaler_params, f)

In [43]:
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.8782 - loss: 0.5584 - val_accuracy: 0.9521 - val_loss: 0.3389
Epoch 2/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9349 - loss: 0.3228 - val_accuracy: 0.9641 - val_loss: 0.1929
Epoch 3/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9515 - loss: 0.1874 - val_accuracy: 0.9701 - val_loss: 0.1281
Epoch 4/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9616 - loss: 0.1179 - val_accuracy: 0.9760 - val_loss: 0.1058
Epoch 5/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9739 - loss: 0.0821 - val_accuracy: 0.9701 - val_loss: 0.0958
Epoch 6/10
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9550 - loss: 0.1013 - val_accuracy: 0.9641 - val_loss: 0.0955
Epoch 7/10
[1m21/21[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x147c0d59f10>

In [44]:
import numpy as np

test1_prob1= { "age": [50],
  "num_sexual_partners": [10],
  "first_sexual_intercourse": [15],
  "num_pregnancies": [4],
  "smokes": [1],
  "smokes_years": [20],
  "smokes_packs_per_year": [10.5],
  "hormonal_contraceptives": [1],
  "hormonal_contraceptives_years": [15],
  "iud": [1],
  "iud_years": [10],
  "stds": [1],
  "stds_number": [5],
  "stds_condylomatosis": [1],
  "stds_cervical_condylomatosis": [1],
  "stds_vaginal_condylomatosis": [1],
  "stds_vulvo_perineal_condylomatosis": [1],
  "stds_syphilis": [1],
  "stds_pelvic_inflammatory_disease": [1],
  "stds_genital_herpes": [1],
  "stds_molluscum_contagiosum": [1],
  "stds_aids": [1],
  "stds_hiv": [1],
  "stds_hepatitis_b": [1],
  "stds_hpv": [1],
  "stds_number_of_diagnosis": [5],
  "dx_cancer": [1],
  "dx_cin": [1],
  "dx_hpv": [1],
  "dx": [1],
  "hinselmann": [1],
  "schiller": [1],
  "citology": [1]
}
scaler=StandardScaler()
tester_prob1 = pd.DataFrame(test1_prob1)
X_normalized_df = pd.DataFrame(scaler.fit_transform(tester_prob1), columns=tester_prob1.columns)

y_pred_proba = model.predict(X_normalized_df)  # Probabilities for ROC-AUC
y_pred = (y_pred_proba > 0.5).astype(int)

print(f"Predictions: {y_pred_proba}, prob {y_pred}, ")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Predictions: [[0.16815016]], prob [[0]], 


In [15]:
test1_prob0= {
  "age": [25],
  "num_sexual_partners": [1],
  "first_sexual_intercourse": [20],
  "num_pregnancies": 1,
  "smokes": [0],
  "smokes_years": [0],
  "smokes_packs_per_year":[ 0.0],
  "hormonal_contraceptives": [0],
  "hormonal_contraceptives_years": [0],
  "iud": [0],
  "iud_years": [0],
  "stds": [0],
  "stds_number": [0],
  "stds_condylomatosis": [0],
  "stds_cervical_condylomatosis": [0],
  "stds_vaginal_condylomatosis": [0],
  "stds_vulvo_perineal_condylomatosis": [0],
  "stds_syphilis": [0],
  "stds_pelvic_inflammatory_disease": [0],
  "stds_genital_herpes": [0],
  "stds_molluscum_contagiosum": [0],
  "stds_aids": [0],
  "stds_hiv": [0],
  "stds_hepatitis_b": [0],
  "stds_hpv": [0],
  "stds_number_of_diagnosis": [0],
  "dx_cancer": [0],
  "dx_cin": [0],
  "dx_hpv": [0],
  "dx": [0],
  "hinselmann": [0],
  "schiller": [0],
  "citology": [0]
}


# scaler=StandardScaler()
tester_prob0 = pd.DataFrame(test1_prob0)
X_normalized_df_0 = pd.DataFrame(scaler.fit_transform(tester_prob0), columns=tester_prob0.columns)

y_pred_proba = model.predict(X_normalized_df_0)  # Probabilities for ROC-AUC
y_pred = (y_pred_proba > 0.5).astype(int)

print(f"Predictions: {y_pred_proba}, prob {y_pred}, ")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Predictions: [[0.22792003]], prob [[0]], 


In [16]:
import tensorflow as tf

# Assuming `model` is your trained TensorFlow model
model.save('my_model.h5')

# Convert the model to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)




INFO:tensorflow:Assets written to: C:\Users\BoraTech\AppData\Local\Temp\tmpt_5j9nvg\assets


INFO:tensorflow:Assets written to: C:\Users\BoraTech\AppData\Local\Temp\tmpt_5j9nvg\assets


Saved artifact at 'C:\Users\BoraTech\AppData\Local\Temp\tmpt_5j9nvg'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 33), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  1407600716960: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407600717136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407600845920: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407600845392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407600847856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407600848032: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [45]:
import tensorflow as tf
import numpy as np

# Load the model
interpreter = tf.lite.Interpreter(model_path="model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
print(output_details)
# Prepare input data


# Run inference
put=[     
      50.0,
      10.0,
      15.0,
      4.0,
      1.0,
      20.0,
      10.5,
      1.0,
      15.0,
      1.0,
      10.0,
      1.0,
      5.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      5.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0,
      1.0 
      ]
# scaler = StandardScaler()
tester_prob1 = pd.DataFrame(put)
tester_prob1 = pd.DataFrame(scaler.fit_transform(tester_prob1), columns=tester_prob1.columns)
# Normalize the test data using the fitted scaler
tester_prob1_normalized = scaler.transform(tester_prob1)

# Ensure the data type matches what the model expects
tester_prob1_normalized = tester_prob1_normalized.astype(np.float32)
print(tester_prob1_normalized)

input_data = np.array(tester_prob1_normalized, dtype=np.float32).reshape((1, -1))
print(input_data)

interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)


[{'name': 'serving_default_keras_tensor:0', 'index': 0, 'shape': array([ 1, 33]), 'shape_signature': array([-1, 33]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[{'name': 'StatefulPartitionedCall_1:0', 'index': 10, 'shape': array([1, 1]), 'shape_signature': array([-1,  1]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
[[-0.03016578]
 [-0.48548567]
 [-0.4285707 ]
 [-0.55378366]
 [-0.58793265]
 [-0.3716557 ]
 [-0.47979417]
 [-0.58793265]
 [-0.4285707 ]
 [-0.58793265]
 [-0.48548567]
 [-0.58793265]
 [-0.54240066]
 [-0.58793265]
 [-0.58793265]
 [-0.58793265]
 [-0.58793265]
 [-0.58793265]
 [-0.58793265]
 [-0.58793265]
 [-0.58793265]
 [-0.58793265]
 [

In [46]:
test1_prob0= {
  "age": [25],
  "num_sexual_partners": [1],
  "first_sexual_intercourse": [20],
  "num_pregnancies": 1,
  "smokes": [0],
  "smokes_years": [0],
  "smokes_packs_per_year":[ 0.0],
  "hormonal_contraceptives": [0],
  "hormonal_contraceptives_years": [0],
  "iud": [0],
  "iud_years": [0],
  "stds": [0],
  "stds_number": [0],
  "stds_condylomatosis": [0],
  "stds_cervical_condylomatosis": [0],
  "stds_vaginal_condylomatosis": [0],
  "stds_vulvo_perineal_condylomatosis": [0],
  "stds_syphilis": [0],
  "stds_pelvic_inflammatory_disease": [0],
  "stds_genital_herpes": [0],
  "stds_molluscum_contagiosum": [0],
  "stds_aids": [0],
  "stds_hiv": [0],
  "stds_hepatitis_b": [0],
  "stds_hpv": [0],
  "stds_number_of_diagnosis": [0],
  "dx_cancer": [0],
  "dx_cin": [0],
  "dx_hpv": [0],
  "dx": [0],
  "hinselmann": [0],
  "schiller": [0],
  "citology": [0]
}


tester_prob0 = pd.DataFrame(test1_prob0)

y_pred_proba = model.predict(tester_prob0)  # Probabilities for ROC-AUC
y_pred = (y_pred_proba > 0.5).astype(int)

print(f"Predictions: {y_pred_proba}, prob {y_pred}, flat {y_pred_flat}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


NameError: name 'y_pred_flat' is not defined

In [166]:
model.save('cervical_cancer_model_standardized.keras')

model = tf.keras.models.load_model('cervical_cancer_model_standardized.keras')

# Convert the model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open('cervical_cancer_model_standardized.tflite', 'wb') as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\BoreTech\AppData\Local\Temp\tmprnxj836i\assets


  saveable.load_own_variables(weights_store.get(inner_path))
INFO:tensorflow:Assets written to: C:\Users\BoreTech\AppData\Local\Temp\tmprnxj836i\assets


Saved artifact at 'C:\Users\BoreTech\AppData\Local\Temp\tmprnxj836i'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 33), dtype=tf.float32, name='input_layer_8')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  1834201956592: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1834372791280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1834373517376: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1834373518432: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1834373571328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1834373571152: TensorSpec(shape=(), dtype=tf.resource, name=None)


testing


In [47]:
import tensorflow as tf
import numpy as np

# Load the TensorFlow model
model = tf.keras.models.load_model('cervical_cancer_model_standardized.keras')

# Convert the model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open('cervical_cancer_model_standardized.tflite', 'wb') as f:
    f.write(tflite_model)

# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path='cervical_cancer_model_standardized.tflite')
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test data
test1_prob1= { "age": [50],
  "num_sexual_partners": [10],
  "first_sexual_intercourse": [15],
  "num_pregnancies": [4],
  "smokes": [1],
  "smokes_years": [20],
  "smokes_packs_per_year": [10.5],
  "hormonal_contraceptives": [1],
  "hormonal_contraceptives_years": [15],
  "iud": [1],
  "iud_years": [10],
  "stds": [1],
  "stds_number": [5],
  "stds_condylomatosis": [1],
  "stds_cervical_condylomatosis": [1],
  "stds_vaginal_condylomatosis": [1],
  "stds_vulvo_perineal_condylomatosis": [1],
  "stds_syphilis": [1],
  "stds_pelvic_inflammatory_disease": [1],
  "stds_genital_herpes": [1],
  "stds_molluscum_contagiosum": [1],
  "stds_aids": [1],
  "stds_hiv": [1],
  "stds_hepatitis_b": [1],
  "stds_hpv": [1],
  "stds_number_of_diagnosis": [5],
  "dx_cancer": [1],
  "dx_cin": [1],
  "dx_hpv": [1],
  "dx": [1],
  "hinselmann": [1],
  "schiller": [1],
  "citology": [1]
}

import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load scaler parameters
with open('scaler_params.json', 'r') as f:
    scaler_params = json.load(f)

mean = np.array(scaler_params['mean'])
scale = np.array(scaler_params['scale'])

tester_prob1 = pd.DataFrame(test1_prob1)

# Normalize input data
tester_prob1_scaled = (tester_prob1 - mean) / scale
input_data = tester_prob1_scaled.astype(np.float32).to_numpy()

# Set the tensor to the input data
interpreter.set_tensor(input_details[0]['index'], input_data)

# Run inference
interpreter.invoke()

# Get the prediction result
tflite_results = interpreter.get_tensor(output_details[0]['index'])

# Print the TensorFlow Lite prediction
print(f"TensorFlow Lite Predictions: {tflite_results}")


  saveable.load_own_variables(weights_store.get(inner_path))


INFO:tensorflow:Assets written to: C:\Users\BoraTech\AppData\Local\Temp\tmpbhw4n2_j\assets


INFO:tensorflow:Assets written to: C:\Users\BoraTech\AppData\Local\Temp\tmpbhw4n2_j\assets


Saved artifact at 'C:\Users\BoraTech\AppData\Local\Temp\tmpbhw4n2_j'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 33), dtype=tf.float32, name='input_layer_8')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  1407690906960: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1408745186928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407691235920: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407715772544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407715589808: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1407715589984: TensorSpec(shape=(), dtype=tf.resource, name=None)
TensorFlow Lite Predictions: [[0.00010774]]
