In [1]:
##TAKING DATASET FROM KAGGLE
import kagglehub

# Download latest version
path = kagglehub.dataset_download("paramaggarwal/fashion-product-images-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fashion-product-images-dataset


In [None]:
#AI MODEL TRAINING CODE

import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle

# =============================================
# 1. Dataset Setup (Kaggle)
# =============================================
BASE_PATH = "/kaggle/input/fashion-product-images-dataset"
DATA_DIR = os.path.join(BASE_PATH, "fashion-dataset/images")
CSV_PATH = os.path.join(BASE_PATH, "fashion-dataset/styles.csv")

# =============================================
# 2. Data Preparation
# =============================================
print("Loading dataset...")
df = pd.read_csv(CSV_PATH, on_bad_lines='skip')
df = df[['id', 'articleType']].dropna()
df['id'] = df['id'].astype(str) + '.jpg'

# Filter for existing images
df = df[df['id'].apply(lambda x: os.path.exists(os.path.join(DATA_DIR, x)))]

# Encode labels
le = LabelEncoder()
df['label'] = le.fit_transform(df['articleType'])
num_classes = len(le.classes_)
print(f"\n{num_classes} categories found. First 10:")
print(le.classes_[:10])

# Save class names to a file for later use in the app
with open("class_names.pkl", "wb") as f:
    pickle.dump(le.classes_, f)

# Train-test split
train_df, val_df = train_test_split(df, test_size=0.1, random_state=42)

# =============================================
# 3. Data Generators
# =============================================
BATCH_SIZE = 32
IMG_SIZE = (128, 128)

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=DATA_DIR,
    x_col="id",
    y_col="label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="raw"
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=DATA_DIR,
    x_col="id",
    y_col="label",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="raw"
)

# =============================================
# 4. Model Architecture (Simplified)
# =============================================
def create_model():
    model = Sequential([
        Conv2D(16, (3,3), activation='relu', padding='same', input_shape=(128,128,3)),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        Conv2D(32, (3,3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        Conv2D(64, (3,3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.4),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

model = create_model()
model.summary()

# =============================================
# 5. Training with Callbacks (Quick)
# =============================================
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=2, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=1, min_lr=1e-6)
]

print("\nTraining model...")
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    validation_data=val_generator,
    validation_steps=len(val_generator),
    epochs=2,
    callbacks=callbacks,
    verbose=1
)

# =============================================
# 6. Save and Evaluate
# =============================================
model.save("fashion_mnist_rgb_fast.h5")
print("\nModel saved to fashion_mnist_rgb_fast.h5")

val_loss, val_acc = model.evaluate(val_generator)
print(f"\nValidation Accuracy: {val_acc:.4f}")
print(f"Validation Loss: {val_loss:.4f}")


Loading dataset...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'] = le.fit_transform(df['articleType'])



142 categories found. First 10:
['Accessory Gift Set' 'Baby Dolls' 'Backpacks' 'Bangle' 'Basketballs'
 'Bath Robe' 'Beauty Accessory' 'Belts' 'Blazers' 'Body Lotion']
Found 39977 validated image filenames.
Found 4442 validated image filenames.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Training model...


  self._warn_if_super_not_called()


Epoch 1/2
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2154s[0m 2s/step - accuracy: 0.2399 - loss: 3.4925 - val_accuracy: 0.3066 - val_loss: 2.8944 - learning_rate: 0.0010
Epoch 2/2
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2144s[0m 2s/step - accuracy: 0.3656 - loss: 2.4330 - val_accuracy: 0.5149 - val_loss: 1.8245 - learning_rate: 0.0010





Model saved to fashion_mnist_rgb_fast.h5
[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 829ms/step - accuracy: 0.5268 - loss: 1.7833

Validation Accuracy: 0.5149
Validation Loss: 1.8245


In [None]:
!pip install streamlit tensorflow


In [None]:
#TAKING IP TO FOR STREAMLIT
!wget -q -O - ipv4.icanhazip.com
#RUNNING THE MAIN FILE
!streamlit run ap-2.py & npx localtunnel --port 8501