In [1]:
import sys 

sys.path.append('../Preprocess-Data/')

In [2]:
import numpy as np 
import pandas as pd 
import os 

from category_reducer import category_reducer
from tensorflow.keras.preprocessing import image

import warnings 
warnings.filterwarnings("ignore")

In [4]:
data_path = '../Data/news-data-with-imgs.csv'

data = pd.read_csv(data_path)
df = data.copy()

# Reducing the number of categories 
df = category_reducer(df)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Title                100 non-null    object
 1   Content              100 non-null    object
 2   Content_url          100 non-null    object
 3   News_type            100 non-null    object
 4   Day_month_year_hour  100 non-null    object
 5   Img_url              100 non-null    object
 6   img_path             100 non-null    object
dtypes: object(7)
memory usage: 5.6+ KB


In [9]:
class PreprocessImg:

    '''
    # Class to preprocess the images and get the size information of the images.
    '''
    def __init__(self):
        self.images_path = "../Data/imgs/"

    def load_img(self):
        '''
        # Load the images from the directory

        Returns:
            - loaded_imgs: List of loaded images
        '''
        loaded_imgs = []
        for img in os.listdir(self.images_path):
            loaded_imgs.append(image.load_img(os.path.join(self.images_path, img)))
        return loaded_imgs

    def get_size_info(self, loaded_imgs):
        '''
        # Get the size information of the images 

        Args:
            - loaded_imgs: list of loaded images

        Returns:
            - weights_mean: Mean of the weights of the images
            - weights_std: Standard deviation of the weights of the images
            - heights_mean: Mean of the heights of the images
            - heights_std: Standard deviation of the heights of the images
        '''
        img_weights = []
        img_heights = []
        for size in loaded_imgs:
            img_weights.append(size.size[0])
            img_heights.append(size.size[1])
        
        weights_mean = np.mean(img_weights)
        weights_std = np.std(img_weights)
        
        heights_mean = np.mean(img_heights)
        heights_std = np.std(img_heights)

        return weights_mean, weights_std, heights_mean, heights_std

In [10]:
preprocessor = PreprocessImg()

loaded_imgs = preprocessor.load_img()

widths_mean, widths_std, heights_mean, heights_std = preprocessor.get_size_info(loaded_imgs)
print("Mean width:", widths_mean)
print("Standard deviation of widths:", widths_std)
print("Mean height:", heights_mean)
print("Standard deviation of heights:", heights_std)

Mean width: 1029.85
Standard deviation of widths: 355.39463628479257
Mean height: 709.22
Standard deviation of heights: 297.7434660911974
