In [1]:
import pandas as pd
import re
import os
import requests

In [2]:
image_df = pd.read_csv('../image_data_collection/english_image_dataset.csv')

In [3]:
sliced_df = image_df[['ImageURL','CategoryName']]

In [4]:
sliced_df.head(4)

Unnamed: 0,ImageURL,CategoryName
0,https://imagesvs.oneindia.com/webp/img/2024/09...,India
1,https://imagesvs.oneindia.com/webp/img/2024/09...,India
2,https://imagesvs.oneindia.com/webp/img/2024/09...,Entertainment
3,https://imagesvs.oneindia.com/webp/img/2024/09...,Bengaluru


In [5]:
# sliced_df['FileName'] = sliced_df['ImageURL'].apply(lambda x: re.search('([^/]+?)(?=\.\w+$)', x))

sliced_df.loc[:, 'FileName'] = sliced_df['ImageURL'].apply(lambda x: re.search('([^/]+?)(?=\.\w+$)', x).group(0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sliced_df.loc[:, 'FileName'] = sliced_df['ImageURL'].apply(lambda x: re.search('([^/]+?)(?=\.\w+$)', x).group(0))


In [6]:
sliced_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   ImageURL      3000 non-null   object
 1   CategoryName  3000 non-null   object
 2   FileName      3000 non-null   object
dtypes: object(3)
memory usage: 70.4+ KB


In [7]:
sliced_df['CategoryName'].nunique()


31

In [8]:
# Function to download the image
def download_image(url, folder, filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(os.path.join(folder, filename), 'wb') as f:
            f.write(response.content)
    else:
        print(f"Failed to download {url}")

In [9]:
# Loop through the DataFrame
for index, row in sliced_df.iterrows():
    category_folder = row['CategoryName']
    
    # Create the category folder if it doesn't exist
    if not os.path.exists(category_folder):
        os.makedirs(category_folder)
    
    # Define the full file path with the filename and extension (assuming .jpg)
    file_path = os.path.join(category_folder, row['FileName'] + '.jpg')
    
    # Download the image and save it to the category folder
    download_image(row['ImageURL'], category_folder, row['FileName'] + '.jpg')

print("Images downloaded and saved in corresponding folders.")

Images downloaded and saved in corresponding folders.


In [10]:
import PIL
import tensorflow as tf

## using Inception (GoogLeNet)

In [14]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input, decode_predictions
import numpy as np
import os

# Load the model
model = InceptionV3(weights='imagenet')

# Function to classify images
def classify_images(image_folder):
    results = {}
    for img_name in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_name)
        img = image.load_img(img_path, target_size=(299, 299))  # Resize according to model
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        # Make predictions
        preds = model.predict(img_array)
        decoded_preds = decode_predictions(preds, top=3)[0]
        
        # Store results
        results[img_name] = decoded_preds

    return results

# Example usage
image_folder = '../image_data_collection/Entertainment'  # Replace with your image folder path
classification_results = classify_images(image_folder)

for img, preds in classification_results.items():
    print(f"{img}: {preds}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 210ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 220ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 267ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 314ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

## Using vgg

In [15]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
import numpy as np
import os

# Load the VGG16 model
model = VGG16(weights='imagenet')

# Function to classify images
def classify_images(image_folder):
    results = {}
    for img_name in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_name)
        img = image.load_img(img_path, target_size=(224, 224))  # Resize according to VGG16 model
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        # Make predictions
        preds = model.predict(img_array)
        decoded_preds = decode_predictions(preds, top=3)[0]
        
        # Store results
        results[img_name] = decoded_preds

    return results

# Example usage
image_folder = '../image_data_collection/Entertainment'  # Replace with your image folder path
classification_results = classify_images(image_folder)

for img, preds in classification_results.items():
    print(f"{img}: {preds}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 39s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 817ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 988ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 879ms/step
[1m1/1[

## Using resnet

In [16]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
import os

# Load the ResNet50 model
model = ResNet50(weights='imagenet')

# Function to classify images
def classify_images(image_folder):
    results = {}
    for img_name in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_name)
        img = image.load_img(img_path, target_size=(224, 224))  # Resize according to ResNet50 model
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        # Make predictions
        preds = model.predict(img_array)
        decoded_preds = decode_predictions(preds, top=3)[0]
        
        # Store results
        results[img_name] = decoded_preds

    return results

# Example usage
image_folder = '../image_data_collection/Entertainment'  # Replace with your image folder path
classification_results = classify_images(image_folder)

for img, preds in classification_results.items():
    print(f"{img}: {preds}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 725ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 348ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 337ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

## Using efficientnet

In [17]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input, decode_predictions
import numpy as np
import os

# Load the EfficientNetB0 model
model = EfficientNetB0(weights='imagenet')

# Function to classify images
def classify_images(image_folder):
    results = {}
    for img_name in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_name)
        img = image.load_img(img_path, target_size=(224, 224))  # Resize according to EfficientNet model
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        # Make predictions
        preds = model.predict(img_array)
        decoded_preds = decode_predictions(preds, top=3)[0]
        
        # Store results
        results[img_name] = decoded_preds

    return results

# Example usage
image_folder = '../image_data_collection/Entertainment'  # Replace with your image folder path
classification_results = classify_images(image_folder)

for img, preds in classification_results.items():
    print(f"{img}: {preds}")


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0.h5
[1m21834768/21834768[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━

## using mobilenet

In [20]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import MobileNet, MobileNetV2, MobileNetV3Large
from tensorflow.keras.applications.mobilenet import preprocess_input, decode_predictions
import numpy as np
import os

# Load the MobileNet model
model = MobileNetV2(weights='imagenet')

# Function to classify images
def classify_images(image_folder):
    results = {}
    for img_name in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_name)
        img = image.load_img(img_path, target_size=(224, 224))  # Resize according to MobileNet model
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        # Make predictions
        preds = model.predict(img_array)
        decoded_preds = decode_predictions(preds, top=3)[0]
        
        # Store results
        results[img_name] = decoded_preds

    return results

# Example usage
image_folder = '../image_data_collection/Entertainment'  # Replace with your image folder path
classification_results = classify_images(image_folder)

for img, preds in classification_results.items():
    print(f"{img}: {preds}")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224.h5
[1m14536120/14536120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m