In [1]:
import os
os.makedirs('streamlit', exist_ok=True)
os.makedirs('streamlit/images', exist_ok=True)

# Copy images to streamlit folder
import shutil
shutil.copy('sample_detections.png', 'streamlit/images/')
shutil.copy('per_class_performance.png', 'streamlit/images/')
shutil.copy('class_size_vs_performance.png', 'streamlit/images/')
shutil.copy('model_metrics.json', 'streamlit/')

print("Created streamlit folder structure")

Created streamlit folder structure


In [2]:
streamlit_code = '''import streamlit as st
import json
import pandas as pd
from PIL import Image
import plotly.express as px

st.set_page_config(page_title="Dental Treatment Detection", layout="wide")

# Load metrics
with open('model_metrics.json', 'r') as f:
    metrics = json.load(f)

st.title("Dental Treatment Detection with AutoML")
st.markdown("**Object Detection Model** - Automated identification of dental treatments from panoramic X-rays")

# Sidebar
st.sidebar.header("Model Info")
st.sidebar.metric("Model Type", "AutoML Vision")
st.sidebar.metric("Overall mAP", f"{metrics['overall']['mAP']:.1%}")
st.sidebar.metric("Training Budget", "20 node hours")

st.sidebar.markdown("---")
st.sidebar.markdown("### Dataset")
st.sidebar.markdown(f"- Total Images: {metrics['overall']['total_images']:,}")
st.sidebar.markdown(f"- Training: {metrics['overall']['training_images']:,}")
st.sidebar.markdown(f"- Validation: {metrics['overall']['validation_images']}")
st.sidebar.markdown(f"- Test: {metrics['overall']['test_images']}")

# Tabs
tab1, tab2, tab3, tab4 = st.tabs(["Sample Detections", "Model Performance", "Training Details", "Key Findings"])

with tab1:
    st.header("Sample Detections")
    st.markdown("Model detections on dental X-ray images showing identified treatments:")
    
    img = Image.open('images/sample_detections.png')
    st.image(img, use_container_width=True)
    
    st.markdown("### Detected Classes")
    col1, col2, col3, col4, col5 = st.columns(5)
    classes = ["Cavity", "Fillings", "Impacted Tooth", "Implant", "Infected-teeth"]
    for col, cls in zip([col1, col2, col3, col4, col5], classes):
        col.metric(cls, "✓")

with tab2:
    st.header("Model Performance")
    
    col1, col2, col3 = st.columns(3)
    col1.metric("Mean Avg Precision", f"{metrics['overall']['mAP']:.1%}")
    col2.metric("Precision", f"{metrics['overall']['precision']:.1%}")
    col3.metric("Recall", f"{metrics['overall']['recall']:.1%}")
    
    st.markdown("---")
    
    # Per-class performance chart
    st.subheader("Per-Class Performance")
    img_perf = Image.open('images/per_class_performance.png')
    st.image(img_perf, use_container_width=True)
    
    st.markdown("---")
    
    # Class size vs performance
    st.subheader("Impact of Training Data Size")
    img_size = Image.open('images/class_size_vs_performance.png')
    st.image(img_size, use_container_width=True)
    
    # Performance table
    st.markdown("---")
    st.subheader("Detailed Metrics")
    df = pd.DataFrame(metrics['per_class'])
    df.columns = ['Class', 'Average Precision', 'Training Samples']
    df['AP %'] = (df['Average Precision'] * 100).round(1)
    df = df[['Class', 'AP %', 'Training Samples']]
    st.dataframe(df, hide_index=True, use_container_width=True)

with tab3:
    st.header("Training Details")
    
    col1, col2 = st.columns(2)
    
    with col1:
        st.markdown("### Model Configuration")
        st.markdown("""
        - **Model Type:** AutoML CLOUD_HIGH_ACCURACY_1
        - **Training Budget:** 20 node hours (~$63)
        - **Training Time:** 4-6 hours (wall time)
        - **Platform:** Google Cloud Vertex AI
        - **Region:** us-central1
        """)
        
        st.markdown("### Dataset")
        st.markdown("""
        - **Source:** DentAi (Roboflow)
        - **Total Images:** 9,772 (after augmentation)
        - **Classes:** 5 (Cavity, Fillings, Impacted Tooth, Implant, Infected-teeth)
        - **Annotations:** 61,720 bounding boxes
        - **Split:** 94% train, 4% validation, 2% test
        """)
    
    with col2:
        st.markdown("### Architecture Pipeline")
        st.code("""
1. Data Preparation
   ↓
2. Upload to GCS
   ↓
3. Create Vertex Dataset
   ↓
4. AutoML Training
   ↓
5. Model Evaluation
   ↓
6. Model Registry
        """, language=None)
        
        st.markdown("### Tech Stack")
        st.markdown("""
        - **ML Platform:** Vertex AI AutoML
        - **Storage:** Google Cloud Storage
        - **Data Processing:** Pandas, Roboflow API
        - **Visualization:** Matplotlib, Pillow
        - **Notebooks:** Jupyter, Vertex AI Workbench
        """)

with tab4:
    st.header("Key Findings & Insights")
    
    st.markdown("### 1. Class Imbalance Impact")
    st.markdown("""
    The model's performance closely correlates with training data size:
    - **Best performers:** Implant (75.1% AP) and Infected-teeth (74.6% AP) had 10K+ samples
    - **Worst performer:** Cavity (42.9% AP) with only 3,456 samples (6% of data)
    - **Recommendation:** Collect more cavity examples or apply targeted augmentation
    """)
    
    st.markdown("### 2. Fillings Underperformance")
    st.markdown("""
    Despite being 54% of the dataset (31,434 samples), Fillings only achieved 69.5% AP:
    - **Root cause:** High intra-class variation (new vs. old fillings look very different)
    - **Solution:** May need class splitting (new_fillings vs. old_fillings)
    """)
    
    st.markdown("### 3. Training Budget Sweet Spot")
    st.markdown("""
    20 node hours achieved 70.9% mAP at ~$63 cost:
    - 40 hours would add only 2-4 mAP points for $60 more (diminishing returns)
    - AutoML used early stopping, actual cost likely $50-60
    - **Cost-effective choice for portfolio project**
    """)
    
    st.markdown("### 4. Production Readiness")
    st.markdown("""
    - **Ready:** Implant and Infected-teeth detection (>74% AP)
    - **Needs improvement:** Cavity detection (more data needed)
    - **Consider ensemble:** Combine with rule-based fallbacks for low-confidence predictions
    """)
    
    st.markdown("---")
    st.markdown("### Next Steps")
    st.markdown("""
    1. Collect more cavity and impacted tooth training examples
    2. Investigate fillings class split (new vs. old)
    3. Test on real clinical data (with HIPAA compliance)
    4. Implement ensemble with dentist-in-the-loop review
    """)

st.markdown("---")
st.markdown("**Project by Arion Farhi** | [GitHub](https://github.com/arion-farhi)")
'''

with open('streamlit/app.py', 'w') as f:
    f.write(streamlit_code)

print("Created streamlit/app.py")

Created streamlit/app.py


In [3]:
requirements = '''streamlit==1.28.0
pandas==2.0.3
plotly==5.17.0
pillow==10.0.1
'''

with open('streamlit/requirements.txt', 'w') as f:
    f.write(requirements)

print("Created streamlit/requirements.txt")

Created streamlit/requirements.txt


In [4]:
dockerfile = '''FROM python:3.10-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

EXPOSE 8501

CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
'''

with open('streamlit/Dockerfile', 'w') as f:
    f.write(dockerfile)

print("Created streamlit/Dockerfile")

Created streamlit/Dockerfile


In [5]:
# Test if all files are in place
import os

files_needed = [
    'streamlit/app.py',
    'streamlit/requirements.txt',
    'streamlit/Dockerfile',
    'streamlit/model_metrics.json',
    'streamlit/images/sample_detections.png',
    'streamlit/images/per_class_performance.png',
    'streamlit/images/class_size_vs_performance.png'
]

print("Checking files...")
for f in files_needed:
    exists = os.path.exists(f)
    print(f"{'✓' if exists else '✗'} {f}")

if all(os.path.exists(f) for f in files_needed):
    print("\n✓ All files ready for deployment!")
else:
    print("\n✗ Some files missing")

Checking files...
✓ streamlit/app.py
✓ streamlit/requirements.txt
✓ streamlit/Dockerfile
✓ streamlit/model_metrics.json
✓ streamlit/images/sample_detections.png
✓ streamlit/images/per_class_performance.png
✓ streamlit/images/class_size_vs_performance.png

✓ All files ready for deployment!


In [6]:
# Create .gitignore
gitignore = '''# Data files (too large)
*.jpg
*.jpeg
*.png
!screenshots/*.png
!streamlit/images/*.png
*.jsonl
DentAi-2/

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
*.egg-info/
.ipynb_checkpoints/

# Secrets
*.json
!streamlit/model_metrics.json
*.pem
*.key
service-account*.json

# IDE
.vscode/
.idea/
*.swp
*.swo

# OS
.DS_Store
Thumbs.db

# Jupyter
.ipynb_checkpoints

# Temporary files
*.log
*.tmp
tmp/
'''

with open('.gitignore', 'w') as f:
    f.write(gitignore)

print("Created .gitignore")

Created .gitignore


In [7]:
import os
import shutil

# Create directory structure
os.makedirs('notebooks', exist_ok=True)
os.makedirs('screenshots', exist_ok=True)

# Move notebooks
notebooks = [
    '01-data-exploration.ipynb',
    '01b-gcs-upload.ipynb', 
    '02-vertex-dataset-creation.ipynb',
    '03-automl-training.ipynb',
    '04-model-evaluation.ipynb'
]

for nb in notebooks:
    if os.path.exists(nb):
        shutil.move(nb, f'notebooks/{nb}')
        print(f"Moved {nb}")

# Copy screenshots
screenshots = [
    'class_distribution.png',
    'dataset_split.png',
    'sample_annotations.png',
    'per_class_performance.png',
    'class_size_vs_performance.png'
]

for img in screenshots:
    if os.path.exists(img):
        shutil.copy(img, f'screenshots/{img}')
        print(f"Copied {img}")

print("\n✓ Directory structure organized for GitHub!")

Moved 01-data-exploration.ipynb
Moved 01b-gcs-upload.ipynb
Moved 02-vertex-dataset-creation.ipynb
Moved 03-automl-training.ipynb
Moved 04-model-evaluation.ipynb
Copied class_distribution.png
Copied dataset_split.png
Copied sample_annotations.png
Copied per_class_performance.png
Copied class_size_vs_performance.png

✓ Directory structure organized for GitHub!


In [9]:
import os

def show_tree(directory, prefix='', max_depth=3, current_depth=0):
    if current_depth >= max_depth:
        return
    
    items = sorted(os.listdir(directory))
    dirs = [i for i in items if os.path.isdir(os.path.join(directory, i)) and not i.startswith('.')]
    files = [i for i in items if os.path.isfile(os.path.join(directory, i)) and not i.startswith('.')]
    
    for d in dirs:
        print(f"{prefix}├── {d}/")
        show_tree(os.path.join(directory, d), prefix + '│   ', max_depth, current_depth + 1)
    
    for f in files:
        print(f"{prefix}├── {f}")

print("dental-treatment-detection/")
show_tree('.', '', max_depth=2)

dental-treatment-detection/
├── DentAi-2/
│   ├── test/
│   ├── train/
│   ├── valid/
│   ├── README.dataset.txt
│   ├── README.roboflow.txt
├── notebooks/
│   ├── 01-data-exploration.ipynb
│   ├── 01b-gcs-upload.ipynb
│   ├── 02-vertex-dataset-creation.ipynb
│   ├── 03-automl-training.ipynb
│   ├── 04-model-evaluation.ipynb
├── screenshots/
│   ├── class_distribution.png
│   ├── class_size_vs_performance.png
│   ├── dataset_split.png
│   ├── per_class_performance.png
│   ├── sample_annotations.png
├── streamlit/
│   ├── images/
│   ├── Dockerfile
│   ├── app.py
│   ├── model_metrics.json
│   ├── requirements.txt
├── 05-generate-demo-predictions.ipynb
├── all_annotations.jsonl
├── all_predictions.png
├── all_predictions_fixed.png
├── batch_prediction_input.jsonl
├── class_distribution.png
├── class_size_vs_performance.png
├── dataset_split.png
├── model_metrics.json
├── per_class_performance.png
├── sample_annotations.png
├── sample_detections.png
├── test_annotations.jsonl
├── test_pr

In [10]:
import os
import shutil

# Move notebook
if os.path.exists('05-generate-demo-predictions.ipynb'):
    shutil.move('05-generate-demo-predictions.ipynb', 'notebooks/05-generate-demo-predictions.ipynb')
    print("Moved 05-generate-demo-predictions.ipynb")

# Delete unnecessary files
files_to_delete = [
    'all_annotations.jsonl',
    'all_predictions.png',
    'all_predictions_fixed.png', 
    'batch_prediction_input.jsonl',
    'class_distribution.png',
    'class_size_vs_performance.png',
    'dataset_split.png',
    'model_metrics.json',
    'per_class_performance.png',
    'sample_annotations.png',
    'sample_detections.png',
    'test_annotations.jsonl',
    'test_predictions.json',
    'train_annotations.jsonl',
    'valid_annotations.jsonl'
]

for f in files_to_delete:
    if os.path.exists(f):
        os.remove(f)
        print(f"Deleted {f}")

print("\n✓ Cleaned up!")

Moved 05-generate-demo-predictions.ipynb
Deleted all_annotations.jsonl
Deleted all_predictions.png
Deleted all_predictions_fixed.png
Deleted batch_prediction_input.jsonl
Deleted class_distribution.png
Deleted class_size_vs_performance.png
Deleted dataset_split.png
Deleted model_metrics.json
Deleted per_class_performance.png
Deleted sample_annotations.png
Deleted sample_detections.png
Deleted test_annotations.jsonl
Deleted test_predictions.json
Deleted train_annotations.jsonl
Deleted valid_annotations.jsonl

✓ Cleaned up!


In [11]:
print("dental-treatment-detection/")
show_tree('.', '', max_depth=2)

dental-treatment-detection/
├── DentAi-2/
│   ├── test/
│   ├── train/
│   ├── valid/
│   ├── README.dataset.txt
│   ├── README.roboflow.txt
├── notebooks/
│   ├── 01-data-exploration.ipynb
│   ├── 01b-gcs-upload.ipynb
│   ├── 02-vertex-dataset-creation.ipynb
│   ├── 03-automl-training.ipynb
│   ├── 04-model-evaluation.ipynb
│   ├── 05-generate-demo-predictions.ipynb
├── screenshots/
│   ├── class_distribution.png
│   ├── class_size_vs_performance.png
│   ├── dataset_split.png
│   ├── per_class_performance.png
│   ├── sample_annotations.png
├── streamlit/
│   ├── images/
│   ├── Dockerfile
│   ├── app.py
│   ├── model_metrics.json
│   ├── requirements.txt


In [12]:
import shutil

# Delete the dataset folder (it's in GCS already)
if os.path.exists('DentAi-2'):
    shutil.rmtree('DentAi-2')
    print("Deleted DentAi-2/ (data is in GCS)")

print("\nFinal structure:")
show_tree('.', '', max_depth=2)

Deleted DentAi-2/ (data is in GCS)

Final structure:
├── notebooks/
│   ├── 01-data-exploration.ipynb
│   ├── 01b-gcs-upload.ipynb
│   ├── 02-vertex-dataset-creation.ipynb
│   ├── 03-automl-training.ipynb
│   ├── 04-model-evaluation.ipynb
│   ├── 05-generate-demo-predictions.ipynb
├── screenshots/
│   ├── class_distribution.png
│   ├── class_size_vs_performance.png
│   ├── dataset_split.png
│   ├── per_class_performance.png
│   ├── sample_annotations.png
├── streamlit/
│   ├── images/
│   ├── Dockerfile
│   ├── app.py
│   ├── model_metrics.json
│   ├── requirements.txt
