In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers


thyroid = pd.read_csv("filtered_thyroid_data.csv")

for col in thyroid.select_dtypes(include='object').columns:
    thyroid[col] = thyroid[col].str.strip().str.title()

mappings = {
    'Gender': {'M': 0, 'F': 1},
    'Hx Radiothreapy': {'No': 0, 'Yes': 1},
    'Adenopathy': {
        'No': 0, 'Right': 1, 'Left': 1, 'Bilateral': 1, 'Posterior': 1, 'Extensive': 1
    },
    'Pathology': {
        'Papillary': 0, 'Micropapillary': 1, 'Follicular': 1, 'Hurthel Cell': 1
    },
    'Focality': {'Uni-Focal': 0, 'Multi-Focal': 1},
    'Risk': {'Low': 0, 'Intermediate': 1, 'High': 2},
    'T': {
        'T1A': 1, 'T1B': 1, 'T2': 2, 'T3A': 3, 'T3B': 3, 'T4A': 4, 'T4B': 4
    },
    'N': {'N0': 0, 'N1A': 1, 'N1B': 2},
    'M': {'M0': 0, 'M1': 1},
    'Stage': {'I': 1, 'Ii': 2, 'Iii': 3, 'Iva': 4, 'Ivb': 5},
    'Response': {
        'Excellent': 0, 'Indeterminate': 1,
        'Biochemical Incomplete': 2, 'Structural Incomplete': 3
    },
    'Recurred': {'No': 0, 'Yes': 1}
}

for col, mapping in mappings.items():
    if col in thyroid.columns:
        thyroid[col] = thyroid[col].map(mapping)

print("\nMissing values after mapping:")
print(thyroid.isnull().sum())


thyroid.dropna(inplace=True)

X = thyroid.drop(['Response', 'Stage', 'Recurred'], axis=1)
y = thyroid['Response']

print(f"\nFinal dataset shape: {X.shape}, target shape: {y.shape}")
if X.shape[0] == 0:
    raise ValueError("No data left after cleaning! Check mappings or data quality.")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = keras.Sequential([
    keras.Input(shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.2)


loss, accuracy = model.evaluate(X_test, y_test)
print(f'\nTest Accuracy: {accuracy:.2f}')



Missing values after mapping:
Age                0
Gender             0
Hx Radiothreapy    0
Adenopathy         0
Pathology          0
Focality           0
Risk               0
T                  0
N                  0
M                  0
Stage              0
Response           0
Recurred           0
dtype: int64

Final dataset shape: (383, 10), target shape: (383,)
Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.2493 - loss: 0.0718 - val_accuracy: 0.2903 - val_loss: -0.3058
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.3297 - loss: -0.2590 - val_accuracy: 0.4677 - val_loss: -0.8887
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3304 - loss: -0.8592 - val_accuracy: 0.4677 - val_loss: -1.7186
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3767 - loss: -1.7369 - val_accuracy: 0.5000 -