<a href="https://colab.research.google.com/github/ahamednazeer/AI/blob/master/oral%20cancer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Step 1: Install required packages
!pip install streamlit==1.31.1 pyngrok tensorflow==2.16.1 pandas numpy scikit-learn matplotlib seaborn kaggle

# Step 2: Install ngrok
!wget https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz
!tar -xvzf ngrok-v3-stable-linux-amd64.tgz
!mv ngrok /usr/local/bin/

# Step 3: Clear old ngrok configuration
!rm -f /root/.config/ngrok/ngrok.yml

# Step 4: Set up ngrok authtoken
!ngrok authtoken 2YsmHMOy8PrZC6e17Q0XxAVdvTt_6iSPDiqyJfh2Uu4hZtvqb

# Step 5: Set up Kaggle API and download dataset
import os
from google.colab import files

# Upload kaggle.json
print("Please upload your kaggle.json file:")
uploaded = files.upload()

# Move kaggle.json to the correct directory
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download and unzip the Oral Cancer dataset (force download)
!kaggle datasets download -d shivam17299/oral-cancer-lips-and-tongue-images --force
!unzip -q -o oral-cancer-lips-and-tongue-images.zip -d oral_cancer_dataset

# Step 6: Prepare the dataset and train the model
import tensorflow as tf
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.utils import compute_class_weight
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, roc_auc_score, f1_score
import numpy as np
import pandas as pd
from PIL import Image
import glob

# Define paths
data_dir = '/content/oral_cancer_dataset/OralCancer'

# Image data generators with enhanced augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=40,
    zoom_range=0.3,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.7, 1.3],
    channel_shift_range=20.0,  # Add color jittering
    fill_mode='nearest',
    preprocessing_function=lambda x: x if np.random.rand() > 0.3 else np.fliplr(x)  # Oversample cancerous class
)

# Consistent preprocessing for inference
inference_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='training',
    shuffle=True
)

validation_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation',
    shuffle=False
)

# Print class indices to verify labels
print("Class indices:", train_generator.class_indices)

# Compute class weights to handle imbalance
classes = train_generator.classes
class_weights = compute_class_weight('balanced', classes=np.unique(classes), y=classes)
class_weight_dict = dict(enumerate(class_weights))
print("Class weights:", class_weight_dict)

# Load pretrained DenseNet201 model
base_model = DenseNet201(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = True
for layer in base_model.layers[:-20]:
    layer.trainable = False

# Build model with increased dropout
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dropout(0.6),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Callbacks
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train model
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=50,
    class_weight=class_weight_dict,
    callbacks=[lr_scheduler, early_stopping],
    verbose=1
)

# Evaluate model on full validation set
val_images, val_labels = [], []
for i in range(len(validation_generator)):
    imgs, lbls = next(validation_generator)
    val_images.append(imgs)
    val_labels.append(lbls)
val_images = np.vstack(val_images)
val_labels = np.hstack(val_labels)
val_pred = model.predict(val_images)
val_pred_binary = (val_pred > 0.5).astype(int)
accuracy = accuracy_score(val_labels, val_pred_binary)
precision = precision_score(val_labels, val_pred_binary, zero_division=0)
recall = recall_score(val_labels, val_pred_binary, zero_division=0)
f1 = f1_score(val_labels, val_pred_binary, zero_division=0)
cm = confusion_matrix(val_labels, val_pred_binary)
auc = roc_auc_score(val_labels, val_pred)

# Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC-ROC: {auc:.4f}")
print(f"Confusion Matrix:\n{cm}")

# Test sample images with batched predictions to reduce retracing
def test_sample_images(model, data_dir, num_samples=5):
    print("\nTesting sample images:")
    for class_name in ['cancer', 'non-cancer']:
        image_paths = glob.glob(os.path.join(data_dir, class_name, '*.jpg'))[:num_samples]
        images = []
        for img_path in image_paths:
            img = Image.open(img_path).convert('RGB')
            img = img.resize((224, 224))
            img_array = np.array(img) / 255.0
            images.append(img_array)
        if images:
            images = np.stack(images, axis=0)
            preds = model.predict(images, verbose=0)
            for i, img_path in enumerate(image_paths):
                pred = preds[i][0]
                pred_label = 'Non-Cancerous' if pred > 0.5 else 'Cancerous'
                print(f"Image: {os.path.basename(img_path)}, True Label: {class_name}, Predicted: {pred_label}, Probability: {pred:.2%}")

test_sample_images(model, data_dir)

# Save model in Keras format
model.save('/content/oral_cancer_model.keras')

# Step 7: Write the Streamlit app code
app_code = """
import streamlit as st
import tensorflow as tf
import numpy as np
from PIL import Image
import os

# Set page configuration
st.set_page_config(page_title="Oral Cancer Predictor", layout="wide")

# Load the pretrained model
@st.cache_resource
def load_model():
    return tf.keras.models.load_model('/content/oral_cancer_model.keras')

model = load_model()

# Function to preprocess image (consistent with training)
def preprocess_image(image):
    image = image.convert('RGB')  # Convert palette images to RGB
    image = image.resize((224, 224))
    image_array = np.array(image) / 255.0
    image_array = np.expand_dims(image_array, axis=0)
    return image_array

# Function to get model metrics
def get_model_metrics():
    return {accuracy}, {precision}, {recall}, {f1}, {auc}, np.array({cm})

# Main Streamlit app
def main():
    st.title("Oral Cancer Prediction System")

    st.sidebar.header("Navigation")
    page = st.sidebar.radio("Select Page", ["Prediction", "Model Metrics"])

    if page == "Prediction":
        st.header("Upload Oral Cavity Image")

        uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

        if uploaded_file is not None:
            image = Image.open(uploaded_file)
            st.image(image, caption='Uploaded Image', use_container_width=True)

            if st.button("Predict"):
                processed_image = preprocess_image(image)
                prediction =植物
model.predict(processed_image, verbose=0)[0][0]
                risk_level = "High" if prediction > 0.7 else "Medium" if prediction > 0.3 else "Low"

                st.subheader("Prediction Results")
                st.write(f"**Risk Level:** {risk_level}")
                st.write(f"**Probability of Non-Cancerous:** {prediction:.2%}")
                st.write(f"**Prediction:** {'Non-Cancerous' if prediction > 0.5 else 'Cancerous'}")

    else:
        st.header("Model Performance Metrics")
        accuracy, precision, recall, f1, auc, cm = get_model_metrics()

        col1, col2, col3, col4 = st.columns(4)
        col1.metric("Accuracy", f"{accuracy:.2%}")
        col2.metric("Precision", f"{precision:.2%}")
        col3.metric("Recall", f"{recall:.2%}")
        col4.metric("F1 Score", f"{f1:.2%}")
        st.metric("AUC-ROC", f"{auc:.2%}")

        st.subheader("Confusion Matrix")
        st.write("The following chart shows the confusion matrix for the model's performance:")

        # Chart.js confusion matrix
        st.components.v1.html(
            f'''
            <canvas id="confusionMatrix"></canvas>
            <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
            <script src="https://cdn.jsdelivr.net/npm/chartjs-chart-matrix"></script>
            <script>
                const ctx = document.getElementById('confusionMatrix').getContext('2d');
                new Chart(ctx, {{
                    type: 'matrix',
                    data: {{
                        datasets: [{{
                            label: 'Confusion Matrix',
                            data: [
                                {{x: 'Non-Cancerous', y: 'Non-Cancerous', v: {cm[0,0]}}},
                                {{x: 'Cancerous', y: 'Non-Cancerous', v: {cm[0,1]}}},
                                {{x: 'Non-Cancerous', y: 'Cancerous', v: {cm[1,0]}}},
                                {{x: 'Cancerous', y: 'Cancerous', v: {cm[1,1]}}}
                            ],
                            backgroundColor: (ctx) => {{
                                const value = ctx.raw.v;
                                const max = Math.max({cm.max()}, 1);
                                const intensity = Math.min(value / max, 1);
                                return `rgba(0, 114, 178, ${{intensity * 0.8 + 0.2}})`;
                            }},
                            borderColor: '#ffffff',
                            borderWidth: 1,
                            width: ({{chart}}) => (chart.chartArea.width - 10) / 2,
                            height: ({{chart}}) => (chart.chartArea.height - 10) / 2
                        }}]
                    }},
                    options: {{
                        plugins: {{
                            legend: {{display: false}},
                            title: {{
                                display: true,
                                text: 'Confusion Matrix',
                                color: '#333333'
                            }}
                        }},
                        scales: {{
                            x: {{
                                ticks: {{color: '#333333'}},
                                title: {{display: true, text: 'Predicted', color: '#333333'}}
                            }},
                            y: {{
                                ticks: {{color: '#333333'}},
                                title: {{display: true, text: 'Actual', color: '#333333'}}
                            }}
                        }}
                    }}
                }});
            </script>
            ''',
            height=400
        )

if __name__ == "__main__":
    main()
""".format(
    accuracy=accuracy,
    precision=precision,
    recall=recall,
    f1=f1,
    auc=auc,
    cm=cm.tolist()
)

# Write the app code to app.py
with open("app.py", "w") as f:
    f.write(app_code)

# Step 8: Run Streamlit and ngrok
from pyngrok import ngrok
import subprocess

# Start Streamlit app in the background
subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "8501"])

# Create ngrok tunnel
public_url = ngrok.connect(8501, bind_tls=True)
print("Public URL:", public_url)

--2025-07-19 14:54:10--  https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz
Resolving bin.equinox.io (bin.equinox.io)... 13.248.244.96, 35.71.179.82, 75.2.60.68, ...
Connecting to bin.equinox.io (bin.equinox.io)|13.248.244.96|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9260570 (8.8M) [application/octet-stream]
Saving to: ‘ngrok-v3-stable-linux-amd64.tgz.3’


2025-07-19 14:54:11 (13.5 MB/s) - ‘ngrok-v3-stable-linux-amd64.tgz.3’ saved [9260570/9260570]

ngrok
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Please upload your kaggle.json file:


Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/shivam17299/oral-cancer-lips-and-tongue-images
License(s): copyright-authors
Downloading oral-cancer-lips-and-tongue-images.zip to /content
  0% 0.00/27.7M [00:00<?, ?B/s]
100% 27.7M/27.7M [00:00<00:00, 709MB/s]
Found 106 images belonging to 2 classes.
Found 25 images belonging to 2 classes.
Class indices: {'cancer': 0, 'non-cancer': 1}
Class weights: {0: 0.7571428571428571, 1: 1.4722222222222223}


  self._warn_if_super_not_called()


Epoch 1/50
[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m1:31[0m 30s/step - accuracy: 0.4688 - loss: 0.7531



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 11s/step - accuracy: 0.4470 - loss: 0.7847 - val_accuracy: 0.7200 - val_loss: 0.5772 - learning_rate: 1.0000e-04
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 7s/step - accuracy: 0.5635 - loss: 0.7199 - val_accuracy: 0.7200 - val_loss: 0.5601 - learning_rate: 1.0000e-04
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 9s/step - accuracy: 0.5357 - loss: 0.7801 - val_accuracy: 0.6800 - val_loss: 0.5599 - learning_rate: 1.0000e-04
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 9s/step - accuracy: 0.6482 - loss: 0.6676 - val_accuracy: 0.7200 - val_loss: 0.5368 - learning_rate: 1.0000e-04
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 7s/step - accuracy: 0.5895 - loss: 0.8256 - val_accuracy: 0.7600 - val_loss: 0.5298 - learning_rate: 1.0000e-04
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 7s/ste



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step
Accuracy: 0.8800
Precision: 0.8571
Recall: 0.7500
F1 Score: 0.8000
AUC-ROC: 0.9706
Confusion Matrix:
[[16  1]
 [ 2  6]]

Testing sample images:




Image: oral-cancer__ProtectWyJQcm90ZWN0Il0_FocusFillWzI5NCwyMjIsIngiLDIwXQ.jpg, True Label: cancer, Predicted: Cancerous, Probability: 1.13%
Image: cancer 6.jpg, True Label: cancer, Predicted: Cancerous, Probability: 3.00%
Image: a342a6a0-d3f6-4459-b99a-6a4d4313aa9f.jpg, True Label: cancer, Predicted: Cancerous, Probability: 8.68%
Image: f063-001a-9781416025276.jpg, True Label: cancer, Predicted: Non-Cancerous, Probability: 74.43%
Image: lip-cancer-pictures-youtube-regarding-lip-cancer-pictures.jpg, True Label: cancer, Predicted: Cancerous, Probability: 3.08%
Image: WP_20141221_0012.jpg, True Label: non-cancer, Predicted: Non-Cancerous, Probability: 89.62%
Image: tongue.jpg, True Label: non-cancer, Predicted: Non-Cancerous, Probability: 93.10%
Image: 20200314_1130302.jpg, True Label: non-cancer, Predicted: Non-Cancerous, Probability: 89.81%
Image: DSC_3536_2.jpg, True Label: non-cancer, Predicted: Non-Cancerous, Probability: 87.92%
Image: 20200314_1129243.jpg, True Label: non-cancer, P

KeyError: 'risk_level'