<a href="https://colab.research.google.com/github/LinarKulinar/ml-for-sec-lab2/blob/main/Get_feature_from_image.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import random
from random import random, randrange, randint

from operator import itemgetter

import os, sys
import pandas as pd

import pickle


#### Парсим изображения из папки

In [2]:
# input_dir_name = 'encoded_images_all/padding_level_0.1/'
# output_dir_name = 'features/padding_level_0.1/'

# input_dir_name = 'encoded_images_all/padding_level_0.2/'
# output_dir_name = 'features/padding_level_0.2/'

input_dir_name = 'encoded_images_all/padding_level_0.9/'
output_dir_name = 'features/padding_level_0.9/'
pictures = os.listdir(input_dir_name)
count_pictures = len(pictures)

imgs_raw = []
for k in range(count_pictures):
    im = cv2.imread(f'{input_dir_name}{pictures[k]}', 0)
    imgs_raw.append(im)

In [3]:
imgs = np.array(imgs_raw)

#### Парсим метки изображений

In [4]:

with open(f'encoded_images_all/labels.pickle', 'rb') as f:
    labels = pickle.load(f)
labels = labels.astype(int)

In [5]:
pictures = pictures[:-1]

#### Опишем функцию `get_features`, которая из изображения возвращает метод признаков, с помощью метода пар значений.

In [6]:
q = 2 # Вторая битовая плоскость

def get_features(img, method, q_beatmap = 2):
  """
      Метод вычленяет вектор признаков из изображений
      :param img: Исходное изображение
      :param method: Имя метода, которым будет совершаться преобразование изображения в вектор признаков
      :param q_beatmap: номер битовой плоскости, в которой происходит поиск
      """
  assert len(img.shape) == 2

  def get_features_pixel_pairs_value(image, q_beatmap = 2):
    """
      Вычленяет вектор признаков методом пар значений.
      Метод основывается на гипотезе о том, 
      что равновероятно количество яркостей пикселя, 
      отличающихся только битом на битовой плоскости q_beatmap
      :param img: Исходное изображение
      :param q_beatmap: номер битовой плоскости, в которой происходит поиск
      """
    
    def get_hist_theoretical(hist_empiritical, q_beatmap = 2):
      """
      Вычисляет теоретическую гистограмму по эмпирической гистограмме 
      на основе гипотезы о том, что равновероятно количество яркостей пикселя, 
      отличающихся только битом на битовой плоскости q_beatmap
      :param hist_empiritical: эмпирическая гистограмма изображения
      :param q_beatmap: номер битовой плоскости, в которой происходит поиск
      """
      assert 1 <= q_beatmap <= 8, "q_beatmap отличается от допустимых значений"
      assert len(hist_empiritical) == 256
      assert len(hist_empiritical) > 2 ** (q_beatmap - 1), "q_beatmap больше значения возможного для hist_e"
      bitmask = 1 << (q_beatmap - 1) # битмаска с единицей на месте q_beatmap
      bitmask_inverse = 255 - (bitmask)
      result = []
      for i in range(len(hist_empiritical)):
        index1 = i & bitmask_inverse  # обнулили бит на битовой плоскости q_beatmap
        index2 = i | bitmask # поставили единицу на бит на битовой плоскости q_beatmap
        result.append((hist_empiritical[index1] + hist_empiritical[index2]) / 2)
      return np.array(result)

    hist_empiritical = np.histogram(image, bins=256, range=(0, 256))[0]
    hist_theoretical = get_hist_theoretical(hist_empiritical, q)
    return (np.array(hist_empiritical) - hist_theoretical) ** 2

  if method == 'pixel_pairs_value':
    return get_features_pixel_pairs_value(img)
  else:
    raise ValueError("Invalid param 'method' value")
  


# get_features(imgs[0], 'pixel_pairs_value', q_beatmap=2)

#### Передадим изображения в метод `get_features`

In [7]:
features = []
for index, img in enumerate(imgs):
  features.append(get_features(img, 'pixel_pairs_value', q_beatmap=2))
features = np.array(features)

In [8]:
features.shape

(1000, 256)

In [9]:
df = pd.DataFrame(features)

In [10]:
df['label'] = labels 

In [11]:
print(type(df))
df.tail(10)

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,247,248,249,250,251,252,253,254,255,label
990,0.0,1.0,0.0,1.0,58081.0,107912.25,58081.0,107912.25,246016.0,119370.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
991,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,289.0,9604.0,...,4.0,9.0,2.25,9.0,2.25,0.0,0.0,0.0,0.0,0
992,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4160.25,1936.0,506.25,1936.0,506.25,3364.0,8464.0,3364.0,8464.0,0
993,0.25,380.25,0.25,380.25,5776.0,1640.25,5776.0,1640.25,121.0,182.25,...,1406.25,144.0,6.25,144.0,6.25,729.0,3660.25,729.0,3660.25,0
994,0.0,4.0,0.0,4.0,25.0,400.0,25.0,400.0,2209.0,576.0,...,3969.0,2209.0,441.0,2209.0,441.0,839056.0,558009.0,839056.0,558009.0,0
995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0
996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,361.0,2500.0,777042.25,2500.0,777042.25,69797670.25,3653832.25,69797670.25,3653832.25,0
997,0.0,576.0,0.0,576.0,8556.25,12.25,8556.25,12.25,38220.25,96721.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,36.0,...,210.25,4.0,0.0,4.0,0.0,930.25,2162.25,930.25,2162.25,0
999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


#### Сохраним полученные фичи в файл

In [12]:
df.to_csv(f'{output_dir_name}features.csv')