In [95]:
import os
%pwd

'c:\\Users\\aarav\\Downloads\\VisualSearch\\srcx'

In [3]:
os.chdir('../')
%pwd

'c:\\Users\\aarav\\Downloads\\VisualSearch\\srcx'

In [96]:
from dataclasses import dataclass
from pathlib import Path 

@dataclass(frozen=True)
class DataPreprocessingConfig:
    root_dir: Path
    data_path: Path
    model_path: Path
    feature_list: Path 
    file_names: Path 
    params_target_size: list           
    


In [97]:
from VisualSearch.constants import *
from VisualSearch.utils.common import read_yaml, create_directories

In [98]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH):
        
            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)
            self.schema = read_yaml(schema_filepath)

            create_directories([self.config.artifacts_root])

    def get_data_preprocessing_config(self) -> DataPreprocessingConfig:
        config = self.config.data_preprocessing 
        create_directories([config.root_dir])

        data_preprocessing_config = DataPreprocessingConfig(
             root_dir=config.root_dir,
             data_path=config.data_path,
             model_path=config.model_path,
             feature_list=config.feature_list,
             file_names=config.file_names,
             params_target_size=self.params.TARGET_SIZE
        )

        return data_preprocessing_config

In [99]:
import tensorflow 
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input	
import numpy as np
from numpy.linalg import norm
import os 
from tqdm import tqdm
import joblib


In [100]:
class DataPreprocessing:
    def __init__(self, config: DataPreprocessingConfig):
        self.config = config
        self.model = joblib.load(self.config.model_path)
        self.file_names = []
        self.feature_list = []

    def get_data_preprocess(self,path):

        # img = image.load_img(path,self.config.params_target_size)
        img = image.load_img(path,target_size=self.config.params_target_size, color_mode="rgb")
        img_array = image.img_to_array(img)
        expanded_img_array = np.expand_dims(img_array, axis=0)
        preprocessed_img = preprocess_input(expanded_img_array)
        result = self.model.predict(preprocessed_img).flatten()
        normalized_result = result / norm(result)

        return normalized_result

    def preprocess_all_data(self):
        for file in os.listdir(self.config.data_path):
            self.file_names.append(os.path.join(self.config.data_path, file))

        for file in tqdm(self.file_names):
            self.feature_list.append(self.get_data_preprocess(file))

        joblib.dump(self.file_names,os.path.join(self.config.root_dir,self.config.file_names))
        joblib.dump(self.feature_list,os.path.join(self.config.root_dir,self.config.feature_list))




In [102]:
try:
    config = ConfigurationManager()
    data_preprocessing_config = config.get_data_preprocessing_config()
    data_preprocessing = DataPreprocessing(config=data_preprocessing_config)
    data_preprocessing.preprocess_all_data()
except Exception as e:
    raise e    

[2024-02-02 18:50:36,363: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-02 18:50:36,383: INFO: common: yaml file: params.yaml loaded successfully]
[2024-02-02 18:50:36,393: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-02-02 18:50:36,397: INFO: common: created directory at: artifacts]
[2024-02-02 18:50:36,402: INFO: common: created directory at: artifacts/data_preprocessing]


  0%|          | 0/44441 [00:00<?, ?it/s]



  0%|          | 1/44441 [00:04<50:49:34,  4.12s/it]



  0%|          | 2/44441 [00:06<37:58:00,  3.08s/it]



  0%|          | 3/44441 [00:08<33:44:05,  2.73s/it]



  0%|          | 4/44441 [00:11<31:20:03,  2.54s/it]



  0%|          | 5/44441 [00:13<30:11:36,  2.45s/it]



  0%|          | 6/44441 [00:15<29:38:56,  2.40s/it]



  0%|          | 7/44441 [00:17<29:29:00,  2.39s/it]



  0%|          | 8/44441 [00:20<29:14:09,  2.37s/it]



  0%|          | 9/44441 [00:22<29:11:12,  2.36s/it]



  0%|          | 10/44441 [00:25<29:19:19,  2.38s/it]



  0%|          | 11/44441 [00:27<29:32:21,  2.39s/it]



  0%|          | 12/44441 [00:32<33:07:50,  2.68s/it]


KeyboardInterrupt: 