<a href="https://colab.research.google.com/github/aditi25mip10089/Thyroid-Prediction-App/blob/main/Thyroid_Prediction_App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Loading dataset
df = pd.read_csv("thyroid_data.csv")

# Pre processing data to ensure binary values
df["Recurred"] = df["Recurred"].map({"Yes": 1, "No": 0})

# Splitting features & target
X = df.drop(columns=["Recurred"])
y = df["Recurred"]

# Identifying feature types
numeric_features = X.select_dtypes(include=["int64", "float64"]).columns
categorical_features = X.select_dtypes(include=["object"]).columns

# Preprocessing & model
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
        ("num", "passthrough", numeric_features)
    ]
)

model = RandomForestClassifier(
    n_estimators=300,
    class_weight="balanced",
    random_state=42
)

pipeline = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", model)
])

# Training & testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)

pipeline.fit(X_train, y_train)

# Evaluation
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.96      1.00      0.98        69
           1       1.00      0.89      0.94        27

    accuracy                           0.97        96
   macro avg       0.98      0.94      0.96        96
weighted avg       0.97      0.97      0.97        96



In [8]:
new_patient = {
    "Age": 31,
    "Gender": "F",
    "Smoking": "No",
    "Hx Smoking": "No",
    "Hx Radiothreapy": "No",
    "Thyroid Function": "Euthyroid",
    "Physical Examination": "Single nodular goiter-right",
    "Adenopathy": "No",
    "Pathology": "Papillary",
    "Focality": "Uni-Focal",
    "Risk": "Low",
    "T": "T1a",
    "N": "N0",
    "M": "M0",
    "Stage": "I",
    "Response": "Excellent"
}

df_new = pd.DataFrame([new_patient])
prediction = pipeline.predict(df_new)[0]
probability = pipeline.predict_proba(df_new)[0][1]

print("Recurred:", "Yes" if prediction==1 else "No")
print("Probability of recurrence:", round(probability, 4))


Recurred: No
Probability of recurrence: 0.0033


In [10]:
# Saving model for further use
import joblib
joblib.dump(pipeline, "thyroid_model.pkl")


['thyroid_model.pkl']

In [12]:
# Using pretrained
import joblib

model = joblib.load("thyroid_model.pkl")


In [13]:
import pandas as pd

new_patient = {
    "Age": 45,
    "Gender": "F",
    "Smoking": "No",
    "Hx Smoking": "No",
    "Hx Radiothreapy": "No",
    "Thyroid Function": "Hypothyroidism",
    "Physical Examination": "Multi nodular goiter-right",
    "Adenopathy": "No",
    "Pathology": "Papillary",
    "Focality": "Uni-Focal",
    "Risk": "Low",
    "T": "T1b",
    "N": "N0",
    "M": "M0",
    "Stage": "I",
    "Response": "Excellent"
}

df_new = pd.DataFrame([new_patient])


In [14]:
prediction = model.predict(df_new)[0]
probability = model.predict_proba(df_new)[0][1]

print("Recurred:", "Yes" if prediction == 1 else "No")
print("Probability:", round(probability, 4))


Recurred: No
Probability: 0.0033


In [16]:
!pip freeze > requirements.txt