## **Data Preparation**

In [1]:
import pandas as pd
branded_info_df = pd.read_csv("../Data/brand_info.csv")
branded_info_df.head()

Unnamed: 0.1,Unnamed: 0,ID,GenderType,Type,SubType,Article,PrimaryColor,Seasonal,Year,Use,Brand
0,1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England
1,2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan
2,4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma
3,8,29114,Men,Accessories,Socks,Socks,Navy Blue,Summer,2012.0,Casual,Puma
4,10,9204,Men,Footwear,Shoes,Casual Shoes,Black,Summer,2011.0,Casual,Puma


In [2]:
branded_info_df.drop(columns="Unnamed: 0",inplace=True)
branded_info_df.head()


Unnamed: 0,ID,GenderType,Type,SubType,Article,PrimaryColor,Seasonal,Year,Use,Brand
0,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England
1,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan
2,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma
3,29114,Men,Accessories,Socks,Socks,Navy Blue,Summer,2012.0,Casual,Puma
4,9204,Men,Footwear,Shoes,Casual Shoes,Black,Summer,2011.0,Casual,Puma


In [3]:
image_IDs = list(branded_info_df['ID'].values)


In [4]:
from glob import glob
import os
images = glob("../Data/images/*.*")
real_fake = {}
# Extract the basenames of the image files
image_basenames = [os.path.basename(file) for file in images]

# Print the basenames
for basename in image_basenames:
    image_id = basename.split(".")
    if int(image_id[0]) in image_IDs:
        real_fake[basename] = "Real" ## Real image
    else:
        real_fake[basename] = "Fake" # Fake image

In [5]:
real_fake

{'10000.jpg': 'Fake',
 '10001.jpg': 'Fake',
 '10002.jpg': 'Fake',
 '10003.jpg': 'Real',
 '10004.jpg': 'Real',
 '10005.jpg': 'Real',
 '10006.jpg': 'Real',
 '10007.jpg': 'Real',
 '10008.jpg': 'Real',
 '10009.jpg': 'Real',
 '10010.jpg': 'Real',
 '10011.jpg': 'Real',
 '10012.jpg': 'Real',
 '10013.jpg': 'Real',
 '10014.jpg': 'Real',
 '10015.jpg': 'Fake',
 '10016.jpg': 'Fake',
 '10017.jpg': 'Real',
 '10018.jpg': 'Real',
 '10019.jpg': 'Real',
 '10020.jpg': 'Real',
 '10021.jpg': 'Fake',
 '10022.jpg': 'Real',
 '10023.jpg': 'Real',
 '10024.jpg': 'Real',
 '10025.jpg': 'Real',
 '10026.jpg': 'Real',
 '10027.jpg': 'Real',
 '10028.jpg': 'Real',
 '10029.jpg': 'Real',
 '10030.jpg': 'Real',
 '10031.jpg': 'Real',
 '10032.jpg': 'Real',
 '10033.jpg': 'Real',
 '10034.jpg': 'Real',
 '10035.jpg': 'Fake',
 '10037.jpg': 'Fake',
 '10039.jpg': 'Fake',
 '10040.jpg': 'Real',
 '10041.jpg': 'Real',
 '10042.jpg': 'Real',
 '10043.jpg': 'Real',
 '10044.jpg': 'Real',
 '10045.jpg': 'Real',
 '10046.jpg': 'Real',
 '10047.jp

In [6]:
# Create a DataFrame from the dictionary
df = pd.DataFrame(list(real_fake.items()), columns=['ImageName', 'RealOrFake'])
# Print the DataFrame
df.head()

Unnamed: 0,ImageName,RealOrFake
0,10000.jpg,Fake
1,10001.jpg,Fake
2,10002.jpg,Fake
3,10003.jpg,Real
4,10004.jpg,Real


In [7]:
df['RealOrFake'].value_counts()

RealOrFake
Fake    29200
Real    15126
Name: count, dtype: int64

In [8]:
import plotly.express as px
fig = px.histogram(df, x='RealOrFake', title='Real Vs Fake')
fig.update_layout(
    width=800,  # Set the width of the figure
    height=600,  # Set the height of the figure
)
fig.show()

In [9]:
from sklearn.utils import resample
# Separate majority and minority classes
df_fake = df[df['RealOrFake'] == 'Fake']
df_real = df[df['RealOrFake'] == 'Real']

# Downsample the majority class (Fake)
df_fake_downsampled = resample(df_fake, 
                               replace=False,    # sample without replacement
                               n_samples=len(df_real), # match number of real instances
                               random_state=42)  # reproducible results

# Combine minority class with downsampled majority class
df_balanced = pd.concat([df_real, df_fake_downsampled])

# Shuffle the resulting DataFrame
df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

# Check the new balance
print(df_balanced['RealOrFake'].value_counts())

RealOrFake
Fake    15126
Real    15126
Name: count, dtype: int64


In [10]:
fig = px.histogram(df_balanced, x='RealOrFake', title='Real Vs Fake')
fig.update_layout(
    width=800,  # Set the width of the figure
    height=600,  # Set the height of the figure
)
fig.show()

In [16]:
df_balanced_sample = df_balanced.sample(frac=0.3)
fig = px.histogram(df_balanced_sample, x='RealOrFake', title='Real Vs Fake')
fig.update_layout(
    width=800,  # Set the width of the figure
    height=600,  # Set the height of the figure
)
fig.show()

In [17]:
import os
import shutil
import pandas as pd

# Define the paths to the source directory containing images and the destination directories
source_dir = '../Data/images'
real_dir = '../input/Real'
fake_dir = '../input/Fake'

# Create the destination directories if they don't exist
os.makedirs("../input", exist_ok=True)
os.makedirs(real_dir, exist_ok=True)
os.makedirs(fake_dir, exist_ok=True)


In [18]:
# Iterate over the DataFrame and copy files to the appropriate folders
for _, row in df_balanced_sample.iterrows():
    image_name = row['ImageName']
    label = row['RealOrFake']
    
    source_path = os.path.join(source_dir, image_name)
    
    if label == 'Real':
        dest_path = os.path.join(real_dir, image_name)
    else:
        dest_path = os.path.join(fake_dir, image_name)
    
    # Copy the image to the destination folder
    shutil.copy(source_path, dest_path)

print("Images have been successfully copied to their respective folders.")

Images have been successfully copied to their respective folders.


In [19]:
import splitfolders

splitfolders.ratio("../input/", output="../imageSplits", seed=42, ratio=(0.7, 0.15, 0.15))


In [23]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import VGG16  # Example pre-trained model

# Define the directories
base_dir = '../imageSplits'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')
test_dir = os.path.join(base_dir, 'test')

# Image dimensions
IMG_HEIGHT = 60
IMG_WIDTH = 60
BATCH_SIZE = 12

# Data Augmentation and ImageDataGenerators
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

# Evaluate the model on the test set
test_generator = val_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)


Found 7837 images belonging to 2 classes.
Found 1679 images belonging to 2 classes.
Found 1682 images belonging to 2 classes.


In [32]:
from tensorflow.keras import layers, models

# Model Building
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    # layers.Dropout(0.5),  # Added dropout to reduce overfitting
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),  # Added another dropout layer
    layers.Dense(1, activation='sigmoid')
])

model.summary()



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [33]:

# Compile the Model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])


In [35]:
# Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the Model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    callbacks=[early_stopping]
)

Epoch 1/10
[1m654/654[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 38ms/step - accuracy: 0.5186 - loss: 0.7785 - val_accuracy: 0.5962 - val_loss: 0.6733
Epoch 2/10
[1m654/654[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 37ms/step - accuracy: 0.5620 - loss: 0.6910 - val_accuracy: 0.5873 - val_loss: 0.6759
Epoch 3/10
[1m654/654[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 38ms/step - accuracy: 0.5722 - loss: 0.6870 - val_accuracy: 0.6015 - val_loss: 0.6562
Epoch 4/10
[1m654/654[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 38ms/step - accuracy: 0.5835 - loss: 0.6734 - val_accuracy: 0.6212 - val_loss: 0.6488
Epoch 5/10
[1m654/654[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 38ms/step - accuracy: 0.5979 - loss: 0.6656 - val_accuracy: 0.5878 - val_loss: 0.6845
Epoch 6/10
[1m654/654[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 38ms/step - accuracy: 0.5860 - loss: 0.6676 - val_accuracy: 0.6444 - val_loss: 0.6357
Epoch 7/10
[1m6

In [39]:
import plotly.graph_objects as go

# Assuming `history` is the object returned by model.fit
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

# Create the plot
fig = go.Figure()

# Add traces for training and validation accuracy
fig.add_trace(go.Scatter(
    x=list(range(1, len(train_acc) + 1)),
    y=train_acc,
    mode='lines+markers',
    name='Training Accuracy'
))

fig.add_trace(go.Scatter(
    x=list(range(1, len(val_acc) + 1)),
    y=val_acc,
    mode='lines+markers',
    name='Validation Accuracy'
))

# Update layout
fig.update_layout(
    title='Training and Validation Accuracy',
    xaxis_title='Epoch',
    yaxis_title='Accuracy',
    legend_title='Legend',
    template='plotly_dark'  # Optional: You can choose other themes if preferred
)

fig.update_layout(
    width=1200,  # Set the width of the figure
    height=1000,  # Set the height of the figure
)
# Show the plot
fig.show()


In [31]:
test_loss, test_acc = model.evaluate(test_generator)
print(f'Test Accuracy: {test_acc:.2f}')

# Save the model
model.save('../MODEL/brand_classifier_model.h5')

[1m  3/141[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9s[0m 70ms/step - accuracy: 0.7500 - loss: 0.5182


Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.



[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 86ms/step - accuracy: 0.6346 - loss: 0.6285




Test Accuracy: 0.62
