<a href="https://colab.research.google.com/github/changsin/ClassifyImages/blob/main/notebooks/dashboard_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Car Dashboard EDA
Below is a quick-and-dirty way of doing EDA over car dashboard image datasets. On a car dashboard, there are many sensor indicators: seatbelt, engine, tire pressure, etc. The goal of the project is to train a model that detects these indicators by class type.



## Problem Statement
The challenge is that due to the angle and lighting conditions, a lot of the images have reflections which prevent recognizing the target objects. To overcome the problem, an attempt was made to categorize the images according to visibilities (0 being the worst, 1 & 2 are better qualities) and train the model only using 1 & 2 visibility data. The preliminary result seems promising, but then we found that not all classes show improvement. There are certain classes whose performance degraded after 0 visibility images are removed.

Here is the analysis over the dataset to explore the problems in more detail.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [11]:
# DATA_ROOT = 'C:\\Users\\ChangsinLee\\SkNetworks_CarDashboard_21036\\01.rawData\\test\\test_top15\\'
DATA_ROOT = 'C:\\Users\\ChangsinLee\\SkNetworks_CarDashboard_21036\\01.rawData\\archive\\test_top15_vis12\\'

# Load labels by parsing xmls
For analysis, we just need y data (labels).

In [12]:
import glob
import os

def glob_files(folder, file_type='*'):
    search_string = os.path.join(folder, file_type)
    files = glob.glob(search_string)

    print('Searching ', search_string)
    paths = []
    for f in files:
      if os.path.isdir(f):
        sub_paths = glob_files(f + '/')
        paths += sub_paths
      else:
        paths.append(f)

    # We sort the images in alphabetical order to match them
    #  to the annotation files
    paths.sort()

    return paths

def glob_folders(folder, file_type='*'):
    search_string = os.path.join(folder, file_type)
    files = glob.glob(search_string)

    print('Searching ', search_string)
    paths = []
    for f in files:
      if os.path.isdir(f):
        paths.append(f)

    # We sort the images in alphabetical order to match them
    #  to the annotation files
    paths.sort()

    return paths

In [13]:
from lxml import etree

def parse_xml(filename):
    image_labels = []

    tree = etree.parse(filename)
    for image in tree.xpath('image'):
        # print(image.attrib['name'])
        name = image.attrib['name']
        width = int(image.attrib['width'])
        height = int(image.attrib['height'])

        boxes = []

        for box in image.xpath('box'):
            xtl = float(box.attrib['xtl'])
            ytl = float(box.attrib['ytl'])
            xbr = float(box.attrib['xbr'])
            ybr = float(box.attrib['ybr'])

            alertwarning = box.attrib['label']
            wtype = box.xpath('attribute[@name="name"]')[0].text
            daynight = box.xpath('attribute[@name="daynight"]')[0].text
            visibility = int(box.xpath('attribute[@name="visibility"]')[0].text)

            box = wtype, alertwarning, daynight, visibility, xtl, ytl, xbr, ybr

            boxes.append(box)

        image_labels.append([name, width, height, np.array(boxes)])

    return np.array(image_labels)


In [14]:
def load_labels(path, file_type='*'):
    files = glob_files(path, file_type=file_type)
    print(files)

    y = []
    dfy = []

    for file in files:
        labels = parse_xml(file)
        y.append([os.path.basename(file), labels])
        for label in labels:
            filename = label[0]
            width = label[1]
            height = label[2]
            boxes = label[3]
            for box in boxes:
                wtype = box[0]
                alertwarning = box[1]
                day = box[2]
                visibility = box[3]

                xtl = box[4]
                ytl = box[5]
                xbr = box[6]
                ybr = box[7]

            # print(label)
                dfy.append([os.path.basename(file), filename, width, height, wtype, alertwarning, day, visibility, xtl, ytl, xbr, ybr])
    
    return np.array(y), np.array(dfy)

y, dfyy = load_labels(DATA_ROOT, file_type='*.xml')
y.shape
# image_labels = parse_xml('C:\\Users\\ChangsinLee\\SkNetworks_CarDashboard_21036\\01.rawData\\2차\\BMW\\BMW_day_0_1.xml')
# image_labels

Searching  C:\Users\ChangsinLee\SkNetworks_CarDashboard_21036\01.rawData\archive\test_top15_vis12\*.xml
['C:\\Users\\ChangsinLee\\SkNetworks_CarDashboard_21036\\01.rawData\\archive\\test_top15_vis12\\test_top15_vis12_0.xml']


  return np.array(image_labels)
  return np.array(y), np.array(dfy)


(1, 2)

In [15]:
def extract_labels(y):
    labels = []
    for a in y:
        # print(a.shape)  [0][:, 4:7]
        for b in a[1][:, 3]:
            for c in b:
                labels.append(c)
    return np.array(labels)

labels = extract_labels(y)

# EDA using Pandas

To make queries easy, turning the labels data to pandas dataframe.

In [16]:
import pandas as pd

dfy = pd.DataFrame.from_records(dfyy)
dfy.head()


dfy.columns = ['folder', 'filename', 'width', 'height', 'class', 'alertwarning', 'daynight', 'visibility', 'xtl', 'ytl', 'xbr', 'ybr']
dfy.head()

dfy.drop(['filename', 'width', 'height', 'xtl', 'ytl', 'xbr', 'ybr'], inplace=True, axis=1)
dfy.head()

Unnamed: 0,folder,class,alertwarning,daynight,visibility
0,test_top15_vis12_0.xml,Parking,alert,day,1
1,test_top15_vis12_0.xml,Parking,warning,day,0
2,test_top15_vis12_0.xml,Engine,warning,day,0
3,test_top15_vis12_0.xml,ABS,warning,night,0
4,test_top15_vis12_0.xml,Brake,warning,night,0


In [17]:

for i in range(len(dfy)):
    # print(dfy.loc[i]['category'])
    dfy.loc[i]['folder'] = dfy.loc[i]['folder'][-7]
    dfy.loc[i]['class'] = dfy.loc[i]['alertwarning'] + '@' + dfy.loc[i]['class']

dfy.drop('alertwarning', inplace=True, axis=1)
dfy.head()
# dfy.loc[0]['category'][-7]



Unnamed: 0,folder,class,daynight,visibility
0,2,alert@Parking,day,1
1,2,warning@Parking,day,0
2,2,warning@Engine,day,0
3,2,warning@ABS,night,0
4,2,warning@Brake,night,0


In [18]:
dfy['daynight'] = pd.Categorical(dfy['daynight']).codes

# dfy['alertwarning'] = pd.Categorical(dfy['alertwarning']).codes

# reorder the columns
dfy = dfy[['class', 'daynight', 'folder', 'visibility']]

dfy.tail()


Unnamed: 0,class,daynight,folder,visibility
4711,warning@Parking,1,2,2
4712,warning@Engine,1,2,2
4713,warning@Tire,0,2,0
4714,warning@Engine,1,2,1
4715,alert@Seatbelt,1,2,0


In [19]:
names = pd.unique(dfy['class'])
print("\tclass\tzero-object-ratio\t0-in-f0-ratio\tnon-f0-ratio")
for name in names:
    dfys = dfy[dfy['class'] == name]

    # dfyw = dfy[dfy['class'] == name]

    total = len(dfys)
    total_zeros = len(dfys[dfys['visibility'] == '0'])
    # total objects in zero folder - there could more 1 or 2 visibility objects too
    zero_f_total = len(dfys[dfys['folder'] == '0'])

    df_fzeros = dfys[dfys['folder'] == '0']
    # pure zero visibility objects in zero folder
    zeros_in_zero_f = len(df_fzeros[df_fzeros['visibility'] == '0'])
    print("{}\t{} ({}/{})\t{} ({}/{})\t{} ({}/{})".format(
        name, round(total_zeros/total, 2), total_zeros, total,
            round(zeros_in_zero_f/zero_f_total, 2) if zero_f_total != 0 else 0, zeros_in_zero_f, zero_f_total,
            round((total_zeros - zeros_in_zero_f)/total_zeros, 2), total_zeros - zeros_in_zero_f, total_zeros))


	class	zero-object-ratio	0-in-f0-ratio	non-f0-ratio
alert@Parking	0.31 (158/502)	0 (0/0)	1.0 (158/158)
alert@Coolant	0.25 (64/259)	0 (0/0)	1.0 (64/64)
alert@Steering	0.54 (113/211)	0 (0/0)	1.0 (113/113)
alert@Retaining	0.51 (96/189)	0 (0/0)	1.0 (96/96)
alert@Seatbelt	0.51 (258/503)	0 (0/0)	1.0 (258/258)
alert@Brake	0.36 (119/328)	0 (0/0)	1.0 (119/119)
alert@Distance	0.64 (146/228)	0 (0/0)	1.0 (146/146)


# Load and plot images
To take a peek at the images

In [None]:
path = DATA_ROOT + 'BMW_day_0_1'

import cv2

IMAGE_SIZE = 320

def load_images(path, file_type="*"):
    files = glob_files(path, file_type)

    images = []
    for file in files:
        print(file)
        image = cv2.imread(file)
        if image is not None:
            # image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
            # normalize
            # image = image / 255
            images.append(image)
        else:
            print(file, ' is not an image file')

    return np.array(images)

def plot_images(X, idx=None, limit=20):
  fig = plt.figure(figsize=(20,20))

  # The number of images for plotting is limited to 50
  end_id = len(X) if len(X) < limit else limit
  if idx is None:
    idx = range(0, end_id)

  i = 0
  for id in idx:
    axis = fig.add_subplot(5, 4, i+1)
    plt.axis('off')
    image = X[id]
    plt.imshow(image)
    i += 1


In [None]:
def load_Xy(path, file_type='*'):
    files = glob_files(path, file_type=file_type)
    print(files)

    y = []

    X = []

    for file in files:
        labels = parse_xml(file)
        # y.append([os.path.basename(file), labels])
        for label in labels:
            filename = label[0]

            image_filename = os.path.join(file[:-4], filename)
            # print(image_filename)
            X.append(cv2.imread(image_filename))

            width = label[1]
            height = label[2]
            boxes = label[3]

            y.append([filename, width, height, boxes])
    return np.array(X), np.array(y)

In [None]:
import matplotlib.patches as patches

COLORS = [(0, 255/255, 0), (255/255, 255/255, 0), (255/255, 0, 0)]

def create_patch_rectangle(y, color):
  width = int(y[2] - y[0])
  height = int(y[3] - y[1])
  return patches.Rectangle((y[0], y[1]),
                           width, height,
                           edgecolor=color, fill=False)

def plot_image(image, boxes, axis):
  for box in boxes:
    class_id = int(box[0])
    rect = create_patch_rectangle(box[1:], COLORS[class_id])
    axis.add_patch(rect)

  plt.imshow(image)

def plot_images_with_boxes(X, y, idx=None, limit=20):
  fig = plt.figure(figsize=(150, 150))

  # The number of images for plotting is limited to 50
  end_id = len(X) if len(X) < limit else limit
  if idx is None:
    idx = range(0, end_id)

  i = 0
  for id in idx:
    boxes = y[id][3][:, 3:9]
    npboxes = []
    for a in boxes:
      npboxes.append([int(float(i)) for i in a])
    boxes = np.array(npboxes)
    # print(npboxes)
    # break
    axis = fig.add_subplot(5, 5, i+1)
    # plt.axis('off')
    # image = X[id]
    # plt.imshow(image)
    axis.get_xaxis().set_visible(False)
    axis.get_yaxis().set_visible(False)
    plot_image(X[id], boxes, axis)
    i += 1

In [None]:
# images = load_images(path)
X, y = load_Xy(DATA_ROOT, file_type='BMW_day_2_3*.xml')

plot_images_with_boxes(X, y, idx=[50], limit=1)