In [None]:
!pip install -q tensorflow
!pip install -q streamlit
!pip install -q Pillow
!pip install -q numpy
!pip install -q matplotlib
!pip install -q pyngrok

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m37.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%%writefile train.py
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# --- 1. CONFIGURATION ---
DATASET_PATH = '/content/drive/My Drive/Colab Notebooks/data'
TRAIN_DIR = os.path.join(DATASET_PATH, 'train')
VALIDATION_DIR = os.path.join(DATASET_PATH, 'val')

IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
NUM_EPOCHS = 15

if not os.path.exists(TRAIN_DIR) or not os.path.exists(VALIDATION_DIR):
    raise FileNotFoundError(f"Missing required directories. Please ensure '{TRAIN_DIR}' and '{VALIDATION_DIR}' exist.")

NUM_CLASSES = len(os.listdir(TRAIN_DIR))
print(f"Number of classes detected: {NUM_CLASSES}")

# --- 2. DATA PREPROCESSING AND AUGMENTATION ---
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

class_names = list(train_generator.class_indices.keys())
print("Class names:", class_names)

# --- 3. MODEL TRAINING ---
def create_custom_cnn_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(NUM_CLASSES, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def create_transfer_learning_model():
    base_model = EfficientNetB0(input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3),
                                include_top=False,
                                weights='imagenet')
    base_model.trainable = False
    model = Sequential([
        base_model,
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(NUM_CLASSES, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

print("\n--- Training Custom CNN Model ---")
cnn_model = create_custom_cnn_model()
cnn_history = cnn_model.fit(
    train_generator,
    epochs=NUM_EPOCHS,
    validation_data=validation_generator
)

print("\n--- Training Transfer Learning (EfficientNetB0) Model ---")
tl_model = create_transfer_learning_model()
tl_history = tl_model.fit(
    train_generator,
    epochs=NUM_EPOCHS,
    validation_data=validation_generator
)

cnn_loss, cnn_acc = cnn_model.evaluate(validation_generator)
tl_loss, tl_acc = tl_model.evaluate(validation_generator)

print(f"\nCustom CNN Validation Accuracy: {cnn_acc:.4f}")
print(f"Transfer Learning (EfficientNetB0) Validation Accuracy: {tl_acc:.4f}")

if tl_acc > cnn_acc:
    print("Saving the Transfer Learning model as 'best_model.h5'")
    tl_model.save('best_model.h5')
    best_model_history = tl_history
else:
    print("Saving the Custom CNN model as 'best_model.h5'")
    cnn_model.save('best_model.h5')
    best_model_history = cnn_history

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(best_model_history.history['accuracy'])
plt.plot(best_model_history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.savefig('accuracy_plot.png')

plt.subplot(1, 2, 2)
plt.plot(best_model_history.history['loss'])
plt.plot(best_model_history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.savefig('loss_plot.png')

print("Training completed. Plots saved as 'accuracy_plot.png' and 'loss_plot.png'")
print("Best model saved as 'best_model.h5'")

Writing train.py


In [None]:
!python train.py

2025-08-13 05:16:40.642724: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755062200.674715    1142 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755062200.684738    1142 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755062200.708726    1142 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755062200.708764    1142 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1755062200.708772    1142 computation_placer.cc:177] computation placer alr

In [None]:
%%writefile app.py
import streamlit as st
from PIL import Image
import numpy as np
import tensorflow as tf
import os

try:
    model = tf.keras.models.load_model('best_model.h5')
except Exception as e:
    st.error(f"Error loading model: {e}")
    st.info("Please run `train.py` first to create the model file.")
    st.stop()

IMAGE_SIZE = (224, 224)

DATASET_PATH = '/content/drive/My Drive/Colab Notebooks/data'
TRAIN_DIR = os.path.join(DATASET_PATH, 'train')
if os.path.exists(TRAIN_DIR):
    class_names = sorted([d for d in os.listdir(TRAIN_DIR) if os.path.isdir(os.path.join(TRAIN_DIR, d))])
else:
    class_names = ["Class 1", "Class 2", "Class 3"]
    st.warning("Could not find the 'data/train' directory. Using placeholder class names.")

st.set_page_config(page_title="Fish Species Classifier", layout="centered")
st.title("🐟 Multiclass Fish Image Classification")
st.markdown("Upload an image of a fish and the model will predict its species.")

uploaded_file = st.file_uploader("Choose a fish image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.image(image, caption='Uploaded Image', use_column_width=True)
    st.write("")

    if st.button("Predict"):
        with st.spinner("Classifying..."):
            img_resized = image.resize(IMAGE_SIZE)
            img_array = np.array(img_resized)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = img_array / 255.0

            predictions = model.predict(img_array)
            predicted_class_index = np.argmax(predictions)
            predicted_class_name = class_names[predicted_class_index]
            confidence_score = predictions[0][predicted_class_index]

            st.success(f"Prediction: **{predicted_class_name}**")
            st.info(f"Confidence: {confidence_score*100:.2f}%")

            st.subheader("All Class Probabilities")
            for i, (name, score) in enumerate(zip(class_names, predictions[0])):
                st.write(f"- **{name}**: {score*100:.2f}%")

Writing app.py


In [None]:
print("Starting Streamlit app...")
from pyngrok import ngrok
import subprocess
import time

# Get ngrok's authentication token from the settings
# You may need to get one from https://dashboard.ngrok.com/auth/your-authtoken
# If the command below fails, you can run: !ngrok config add-authtoken YOUR_AUTHTOKEN_HERE
try:
    ngrok.kill()
    ngrok_tunnel = ngrok.connect(addr=8501, bind_tls=True)
    print(f"Your Streamlit app is running at: {ngrok_tunnel.public_url}")
except Exception as e:
    print(f"Error starting ngrok tunnel: {e}")
    print("Please make sure you have an ngrok authtoken configured.")
    print("Go to https://dashboard.ngrok.com/auth/your-authtoken to get one.")
    print("Then run: !ngrok config add-authtoken YOUR_AUTHTOKEN_HERE")


# Start the Streamlit app in the background
subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])

Starting Streamlit app...


ERROR:pyngrok.process.ngrok:t=2025-08-11T16:58:47+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/agents\r\n\r\nERR_NGROK_108\r\n"
ERROR:pyngrok.process.ngrok:t=2025-08-11T16:58:47+0000 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok st

Error starting ngrok tunnel: The ngrok process errored on start: authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/agents\r\n\r\nERR_NGROK_108\r\n.
Please make sure you have an ngrok authtoken configured.
Go to https://dashboard.ngrok.com/auth/your-authtoken to get one.
Then run: !ngrok config add-authtoken YOUR_AUTHTOKEN_HERE


<Popen: returncode: None args: ['streamlit', 'run', 'app.py', '--server.port...>

In [None]:
# ==============================================================================
# Step 6: Run the Streamlit App
# ==============================================================================

print("Starting Streamlit app...")

# 1. Configure ngrok with your personal authtoken
# This command needs to be run only once.
# This configures ngrok to use your verified account.
!ngrok config add-authtoken 3192922EzP7gHi8re9CwKPGKOiv_6FfdFhfDY3SRc4ndhqBbp

# 2. Start the Streamlit app and create a public URL
from pyngrok import ngrok
import subprocess

try:
    # Kill any existing ngrok tunnels to prevent conflicts
    ngrok.kill()

    # Create a tunnel for the Streamlit app, which runs on port 8501
    ngrok_tunnel = ngrok.connect(addr=8501, bind_tls=True)

    print(f"Your Streamlit app is running at: {ngrok_tunnel.public_url}")

    # Start the Streamlit app in the background
    subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])

except Exception as e:
    print(f"Error starting ngrok tunnel: {e}")
    print("Please check that your authtoken is correct and try again.")

Starting Streamlit app...
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Your Streamlit app is running at: https://0d876223e6e9.ngrok-free.app


In [None]:
!pkill streamlit
!nohup streamlit run app.py &

nohup: appending output to 'nohup.out'


In [None]:
# ==============================================================================
# Final step to run your Streamlit app
# ==============================================================================

print("Starting Streamlit app...")

# This command kills any existing ngrok sessions.
# This prevents the '1 simultaneous ngrok agent sessions' error.
from pyngrok import ngrok
ngrok.kill()

# Now, we start the ngrok tunnel.
import subprocess
import time

try:
    # Create a tunnel for the Streamlit app, which runs on port 8501
    ngrok_tunnel = ngrok.connect(addr=8501, bind_tls=True)

    print(f"Your Streamlit app is running at: {ngrok_tunnel.public_url}")

    # Start the Streamlit app in the background
    subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])

except Exception as e:
    print(f"Error starting ngrok tunnel: {e}")
    print("Please check that your authtoken is correct and try again.")

Starting Streamlit app...
Your Streamlit app is running at: https://03d321b6442c.ngrok-free.app


In [None]:
These one please execute

In [None]:
# ==============================================================================
# Corrected and Consolidated Google Colab Code
# All steps are now in a single block to prevent execution errors.
# ==============================================================================

import os
import subprocess
import time
from pyngrok import ngrok

# --- Step 1: Install Libraries ---
print("Step 1: Installing necessary libraries...")
# We use a non-interactive installation with -q to avoid any prompts
# The libraries are installed at the beginning of the notebook's lifecycle
!pip install -q tensorflow streamlit Pillow numpy matplotlib pyngrok
print("Libraries installed successfully.")

# --- Step 2: Mount Google Drive ---
from google.colab import drive
print("\nStep 2: Mounting Google Drive...")
# This will prompt you to authorize Colab to access your Google Drive
drive.mount('/content/drive')
print("Google Drive mounted.")

# --- Step 3: Write the train.py script (Improved Version) ---
# This is a cell magic, it must be at the top of the cell it writes to.
# We are writing the entire Python script into a file named train.py
# This script contains the improved training logic with callbacks.
# A new cell magic `%%writefile train.py` starts a new file write
# and all subsequent lines in this cell are written to `train.py`.
# We have to execute these as separate cells for them to work, but
# we are creating the script files in this code and then running them
# with a subprocess command.

# The use of `%%writefile` in a separate block for each file is the
# correct way to handle this. Since the user's previous code was
# a single cell, the issue was likely with how it was executed.
# This code structure should work correctly.

# --- Step 3: Write the train.py script (Improved Version) ---
# The code for writing the file is encapsulated within a Python script
# that is executed, rather than using the cell magic directly.
# This is a robust way to ensure it works within the single block.

print("\nStep 3: Creating `train.py` script...")
with open('train.py', 'w') as f:
    f.write("""
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt

# --- 1. CONFIGURATION ---
# IMPORTANT: Update this path to where your 'data' folder is on Google Drive
DATASET_PATH = '/content/drive/My Drive/Colab Notebooks/data'
TRAIN_DIR = os.path.join(DATASET_PATH, 'train')
VALIDATION_DIR = os.path.join(DATASET_PATH, 'val')

IMAGE_SIZE = (300, 300)
BATCH_SIZE = 32
NUM_EPOCHS = 30

if not os.path.exists(TRAIN_DIR) or not os.path.exists(VALIDATION_DIR):
    raise FileNotFoundError(f"Missing required directories. Please ensure '{TRAIN_DIR}' and '{VALIDATION_DIR}' exist.")

NUM_CLASSES = len(os.listdir(TRAIN_DIR))
print(f"Number of classes detected: {NUM_CLASSES}")

# --- 2. DATA PREPROCESSING AND AUGMENTATION ---
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

class_names = sorted(list(train_generator.class_indices.keys()))
print("Class names:", class_names)

# --- 3. MODEL TRAINING WITH FINE-TUNING ---
print("\\n--- Training Transfer Learning (EfficientNetB0) Model with Fine-Tuning ---")

base_model = EfficientNetB0(input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3),
                            include_top=False,
                            weights='imagenet')

base_model.trainable = False

model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(NUM_CLASSES, activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

checkpoint_callback = ModelCheckpoint(
    filepath='best_model.h5',
    save_best_only=True,
    monitor='val_accuracy',
    mode='max',
    verbose=1
)
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    train_generator,
    epochs=NUM_EPOCHS,
    validation_data=validation_generator,
    callbacks=[checkpoint_callback, early_stopping_callback]
)

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.savefig('accuracy_plot.png')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.savefig('loss_plot.png')

print("Training completed. Plots saved as 'accuracy_plot.png' and 'loss_plot.png'")
print("Best model saved as 'best_model.h5'")
""")
print("`train.py` script created.")


# --- Step 4: Run the training script ---
print("\nStep 4: Starting model training...")
# The !python command runs the external script we just wrote
!python train.py

# --- Step 5: Write the app.py script ---
print("\nStep 5: Creating `app.py` script...")
with open('app.py', 'w') as f:
    f.write("""
import streamlit as st
from PIL import Image
import numpy as np
import tensorflow as tf
import os

try:
    model = tf.keras.models.load_model('best_model.h5')
except Exception as e:
    st.error(f"Error loading model: {e}")
    st.info("Please run `train.py` first to create the model file.")
    st.stop()

IMAGE_SIZE = (300, 300)

DATASET_PATH = '/content/drive/My Drive/Colab Notebooks/data'
TRAIN_DIR = os.path.join(DATASET_PATH, 'train')
if os.path.exists(TRAIN_DIR):
    class_names = sorted([d for d in os.listdir(TRAIN_DIR) if os.path.isdir(os.path.join(TRAIN_DIR, d))])
else:
    class_names = ["Class 1", "Class 2", "Class 3"]
    st.warning("Could not find the 'data/train' directory. Using placeholder class names.")

st.set_page_config(page_title="Fish Species Classifier", layout="centered")
st.title("🐟 Multiclass Fish Image Classification")
st.markdown("Upload an image of a fish and the model will predict its species.")

uploaded_file = st.file_uploader("Choose a fish image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.image(image, caption='Uploaded Image', use_column_width=True)
    st.write("")

    if st.button("Predict"):
        with st.spinner("Classifying..."):
            img_resized = image.resize(IMAGE_SIZE)
            img_array = np.array(img_resized)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = img_array / 255.0

            predictions = model.predict(img_array)
            predicted_class_index = np.argmax(predictions)
            predicted_class_name = class_names[predicted_class_index]
            confidence_score = predictions[0][predicted_class_index]

            st.success(f"Prediction: **{predicted_class_name}**")
            st.info(f"Confidence: {confidence_score*100:.2f}%")

            st.subheader("All Class Probabilities")
            for i, (name, score) in enumerate(zip(class_names, predictions[0])):
                st.write(f"- **{name}**: {score*100:.2f}%")
""")
print("`app.py` script created.")

# --- Step 6: Run Streamlit and expose it to a public URL with ngrok ---
print("\nStep 6: Starting Streamlit app...")

try:
    ngrok.kill()
except Exception:
    pass

# IMPORTANT: You must replace the authtoken below with your own.
# You can get one for free at https://ngrok.com/
!ngrok config add-authtoken 3192922EzP7gHi8re9CwKPGKOiv_6FfdFhfDY3SRc4ndhqBbp

try:
    ngrok_tunnel = ngrok.connect(addr=8501, bind_tls=True)
    print(f"Your Streamlit app is running at: {ngrok_tunnel.public_url}")

    subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])

except Exception as e:
    print(f"Error starting ngrok tunnel: {e}")
    print("Please check that your authtoken is correct and try again.")


Step 1: Installing necessary libraries...
Libraries installed successfully.

Step 2: Mounting Google Drive...
Mounted at /content/drive
Google Drive mounted.

Step 3: Creating `train.py` script...
`train.py` script created.

Step 4: Starting model training...
2025-08-12 08:17:50.353008: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754986670.379711    1806 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754986670.387793    1806 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1754986670.407465    1806 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0

In [None]:

# --- Step 6: Run Streamlit and expose it to a public URL with ngrok ---
print("\nStep 6: Starting Streamlit app...")

try:
    ngrok.kill()
except Exception:
    pass

# IMPORTANT: You must replace the authtoken below with your own.
# You can get one for free at https://ngrok.com/
!ngrok config add-authtoken 3192922EzP7gHi8re9CwKPGKOiv_6FfdFhfDY3SRc4ndhqBbp

try:
    ngrok_tunnel = ngrok.connect(addr=8501, bind_tls=True)
    print(f"Your Streamlit app is running at: {ngrok_tunnel.public_url}")

    subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])

except Exception as e:
    print(f"Error starting ngrok tunnel: {e}")
    print("Please check that your authtoken is correct and try again.")



Step 6: Starting Streamlit app...
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Your Streamlit app is running at: https://9288ef0a20fc.ngrok-free.app


In [None]:
# ==============================================================================
# Corrected Fish Classification Project with Hugging Face Vision Transformer
# This code fixes the TypeError and adds a robust ngrok session handler.
# ==============================================================================

import os
import subprocess
import time
from pyngrok import ngrok

# --- Step 1: Install Libraries ---
print("Step 1: Installing necessary libraries...")
# Install Hugging Face transformers, datasets, and evaluate
!pip install -q transformers datasets Pillow numpy matplotlib streamlit pyngrok evaluate
print("Libraries installed successfully.")

# --- Step 2: Mount Google Drive ---
from google.colab import drive
print("\nStep 2: Mounting Google Drive...")
drive.mount('/content/drive')
print("Google Drive mounted.")

# --- Step 3: Write the train.py script with Hugging Face logic ---
print("\nStep 3: Creating `train.py` script with Hugging Face Vision Transformer...")
with open('train.py', 'w') as f:
    f.write("""
import os
import numpy as np
import torch
from datasets import load_dataset, Dataset
from transformers import ViTForImageClassification, ViTImageProcessor, TrainingArguments, Trainer
import evaluate
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image

# --- 1. CONFIGURATION ---
# Please ensure this path is correct for your dataset
DATASET_PATH = '/content/drive/My Drive/Colab Notebooks/data'
TRAIN_DIR = os.path.join(DATASET_PATH, 'train')
VAL_DIR = os.path.join(DATASET_PATH, 'val')

if not os.path.exists(TRAIN_DIR) or not os.path.exists(VAL_DIR):
    raise FileNotFoundError(f"Dataset path '{DATASET_PATH}' not found. Please check your Google Drive.")

MODEL_CHECKPOINT = "google/vit-base-patch16-224-in21k" # A powerful pre-trained Vision Transformer
BATCH_SIZE = 16
NUM_EPOCHS = 10
LEARNING_RATE = 2e-5

# --- 2. DATA PREPARATION FOR HUGGING FACE DATASETS ---
print("Preparing dataset...")

# Get ALL unique labels from both train and val directories
all_labels = set(os.listdir(TRAIN_DIR)) | set(os.listdir(VAL_DIR))
labels = sorted(list(all_labels))
label2id = {label: i for i, label in enumerate(labels)}
id2label = {i: label for i, label in enumerate(labels)}

# Create a DataFrame to load data easily
def create_df(data_dir):
    df_data = []
    for label in os.listdir(data_dir):
        label_path = os.path.join(data_dir, label)
        if os.path.isdir(label_path):
            for image_file in os.listdir(label_path):
                image_path = os.path.join(label_path, image_file)
                df_data.append({'image': image_path, 'label': label})
    return pd.DataFrame(df_data)

train_df = create_df(TRAIN_DIR)
val_df = create_df(VAL_DIR)

# Convert DataFrames to Hugging Face Dataset objects
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

def preprocess_images(examples):
    # The Hugging Face processor handles resizing, normalization, etc.
    images = [Image.open(path).convert('RGB') for path in examples['image']]
    examples['pixel_values'] = image_processor(images=images, return_tensors='pt')['pixel_values']
    # Use the globally defined label2id
    examples['label'] = [label2id[label] for label in examples['label']]
    return examples

# Load the ViT image processor
image_processor = ViTImageProcessor.from_pretrained(MODEL_CHECKPOINT)

# Apply preprocessing
train_dataset = train_dataset.map(preprocess_images, batched=True)
val_dataset = val_dataset.map(preprocess_images, batched=True)

# Format the datasets for PyTorch
train_dataset.set_format("torch", columns=['pixel_values', 'label'])
val_dataset.set_format("torch", columns=['pixel_values', 'label'])

# --- 3. MODEL AND TRAINING SETUP ---
print("Initializing model and training arguments...")
model = ViTForImageClassification.from_pretrained(
    MODEL_CHECKPOINT,
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id
)

# Load evaluation metric
metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./vit-finetuned-fish",
    # CORRECTED: Changed 'evaluation_strategy' to 'eval_strategy'
    eval_strategy="epoch",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=NUM_EPOCHS,
    weight_decay=0.01,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    logging_dir='./logs',
    logging_steps=10,
    report_to="none" # Disable logging to external services
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# --- 4. TRAINING ---
print("\\n--- Starting model training with Hugging Face Trainer ---")
train_results = trainer.train()

# Save the best model and processor
trainer.save_model("best_hf_model")
image_processor.save_pretrained("best_hf_model")

# --- 5. EVALUATION AND PLOTTING ---
print("\\n--- Evaluating the final model ---")
eval_results = trainer.evaluate()
print(f"Final evaluation results: {eval_results}")

# Plotting (since Trainer does not provide a history object like Keras)
# We can extract logs to visualize
logs = trainer.state.log_history
train_acc = [l['accuracy'] for l in logs if 'accuracy' in l and 'loss' not in l]
val_acc = [l['eval_accuracy'] for l in logs if 'eval_accuracy' in l]
train_loss = [l['loss'] for l in logs if 'loss' in l]
val_loss = [l['eval_loss'] for l in logs if 'eval_loss' in l]

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(train_acc)
plt.plot(val_acc)
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.savefig('accuracy_plot.png')

plt.subplot(1, 2, 2)
plt.plot(train_loss)
plt.plot(val_loss)
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.savefig('loss_plot.png')

print("Training completed. Plots saved as 'accuracy_plot.png' and 'loss_plot.png'")
print("Best model and image processor saved in 'best_hf_model' directory.")
""")
print("`train.py` script created.")

# --- Step 4: Run the training script ---
print("\nStep 4: Starting model training...")
!python train.py

# --- Step 5: Write the app.py script ---
print("\nStep 5: Creating `app.py` script...")
with open('app.py', 'w') as f:
    f.write("""
import streamlit as st
from PIL import Image
import numpy as np
import os
import torch
from transformers import ViTForImageClassification, ViTImageProcessor

try:
    model_dir = 'best_hf_model'
    model = ViTForImageClassification.from_pretrained(model_dir)
    image_processor = ViTImageProcessor.from_pretrained(model_dir)

except Exception as e:
    st.error(f"Error loading model: {e}")
    st.info("Please run `train.py` first to create the model directory.")
    st.stop()

# Get class names from the model's configuration
class_names = [model.config.id2label[i] for i in range(model.config.num_labels)]

st.set_page_config(page_title="Fish Species Classifier", layout="centered")
st.title("🐟 Multiclass Fish Image Classification")
st.markdown("Upload an image of a fish and the model will predict its species.")

uploaded_file = st.file_uploader("Choose a fish image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.image(image, caption='Uploaded Image', use_column_width=True)
    st.write("")

    if st.button("Predict"):
        with st.spinner("Classifying..."):
            # Preprocess the image using the ViTImageProcessor
            inputs = image_processor(images=image, return_tensors="pt")

            # Make the prediction
            with torch.no_grad():
                outputs = model(**inputs)

            logits = outputs.logits
            predicted_class_index = logits.argmax(-1).item()
            predicted_class_name = model.config.id2label[predicted_class_index]

            # Calculate the confidence score
            probabilities = torch.nn.functional.softmax(logits, dim=-1)[0]
            confidence_score = probabilities[predicted_class_index].item()

            st.success(f"Prediction: **{predicted_class_name}**")
            st.info(f"Confidence: {confidence_score*100:.2f}%")

            st.subheader("All Class Probabilities")
            for i, name in enumerate(class_names):
                st.write(f"- **{name}**: {probabilities[i].item()*100:.2f}%")
""")
print("`app.py` script created.")

# --- Step 6: Run Streamlit and expose it to a public URL with ngrok ---
print("\nStep 6: Starting Streamlit app...")

try:
    # Kill any existing ngrok tunnels to free up the session
    ngrok.kill()
    print("Killed any existing ngrok tunnels.")
except Exception:
    pass


# Ensure you have a valid ngrok authtoken configured
# Note: You can also use `!ngrok config add-authtoken <YOUR_AUTH_TOKEN>`
# if it's not already configured in your environment.
!ngrok config add-authtoken 3192922EzP7gHi8re9CwKPGKOiv_6FfdFhfDY3SRc4ndhqBbp

try:
    ngrok_tunnel = ngrok.connect(addr=8501, bind_tls=True)
    print(f"Your Streamlit app is running at: {ngrok_tunnel.public_url}")

    subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])

except Exception as e:
    print(f"Error starting ngrok tunnel: {e}")
    print("Please check that your authtoken is correct and try again.")



Step 1: Installing necessary libraries...
