## 1. Import the given Xview Labels
***Ref: https://github.com/DIUx-xView/data_utilities/blob/master/xview_class_labels.txt***

In [1]:
# Ensure label.txt file is downloaded into the working directory 
file_name = 'xview_class_labels.txt'

In [2]:
# Create the Xview Data dictionary for comparison
xview_label_dict = {}

with open('xview_class_labels.txt') as file:
    for line in file:
        line = line.rstrip('\n')
        (key, value) = line.split(':')
        xview_label_dict[int(key)] = value

In [3]:
# print each dictionary item
print("" + "\n".join("{!r}: {!r},".format(k, v) for k, v in xview_label_dict.items()) + "}")

11: 'Fixed-wing Aircraft',
12: 'Small Aircraft',
13: 'Cargo Plane',
15: 'Helicopter',
17: 'Passenger Vehicle',
18: 'Small Car',
19: 'Bus',
20: 'Pickup Truck',
21: 'Utility Truck',
23: 'Truck',
24: 'Cargo Truck',
25: 'Truck w/Box',
26: 'Truck Tractor',
27: 'Trailer',
28: 'Truck w/Flatbed',
29: 'Truck w/Liquid',
32: 'Crane Truck',
33: 'Railway Vehicle',
34: 'Passenger Car',
35: 'Cargo Car',
36: 'Flat Car',
37: 'Tank car',
38: 'Locomotive',
40: 'Maritime Vessel',
41: 'Motorboat',
42: 'Sailboat',
44: 'Tugboat',
45: 'Barge',
47: 'Fishing Vessel',
49: 'Ferry',
50: 'Yacht',
51: 'Container Ship',
52: 'Oil Tanker',
53: 'Engineering Vehicle',
54: 'Tower crane',
55: 'Container Crane',
56: 'Reach Stacker',
57: 'Straddle Carrier',
59: 'Mobile Crane',
60: 'Dump Truck',
61: 'Haul Truck',
62: 'Scraper/Tractor',
63: 'Front loader/Bulldozer',
64: 'Excavator',
65: 'Cement Mixer',
66: 'Ground Grader',
71: 'Hut/Tent',
72: 'Shed',
73: 'Building',
74: 'Aircraft Hangar',
76: 'Damaged Building',
77: 'Facility'

## 2. Generate Labels with Text Embedding for the Xview Data

In [4]:
civ_aircraft_sub_dict = {'Fixed-wing Aircraft' : [],
                         'Small Aircraft' : ['wings', 'elevators', 'rudder', 'tail', 'cockpit', 'single engine', 'white', 'red', 'blue', 'silver', 'fuselage'],
                         'Cargo Plane' : ['wings', 'elevators', 'rudder', 'tail',  'cockpit', 'multi engine', 'white', 'red', 'blue', 'silver', 'fuselage'],
                         'Helicopter' : ['main rotor', 'tail rotor', 'rudder', 'tail',  'cockpit', 'red', 'white', 'blue', 'fuselage']}

In [5]:
passenger_sub_dict = {'Passenger Vehicle' : [],
                      'Small Car' : ['wheels', 'windows', 'cab', 'doors', 'hood', 'trunk', 'white', 'black', 'silver', 'red', 'blue', 'small'],
                      'Bus' : ['wheels', 'windows', 'cab', 'doors', 'long', 'rectangle', 'white', 'silver', 'red', 'blue']}

In [6]:
truck_sub_dict = {'Pickup Truck' : ['wheels', 'windows', 'cab', 'doors', 'hood', 'bed', 'white', 'black', 'silver', 'red', 'blue'],
                  'Utility Truck' : ['wheels', 'windows', 'cab', 'doors', 'hood', 'bed', 'box', 'crane', 'winch', 'white'],
                  'Truck' : [],
                  'Cargo Truck' : ['wheels', 'windows', 'cab', 'doors', 'bed', 'box', 'container', 'tractor', 'trailer', 'long', 'rectangle'],
                  'Truck w/Box' : ['wheels', 'windows', 'cab', 'doors', 'bed', 'box'],
                  'Truck Tractor' : ['Shipping Container', 'wheels', 'windows', 'cab', 'doors', 'bed', 'box', 'trailer'],
                  'Trailer' : [],
                  'Truck w/Flatbed' : ['wheels', 'windows', 'cab', 'doors', 'bed', 'flat', 'trailer'],
                  'Truck w/Liquid' : ['wheels', 'windows', 'cab', 'doors', 'bed', 'tube', 'tank', 'trailer'],
                  'Crane Truck' : ['wheels', 'windows', 'cab', 'doors', 'boom', 'crane', 'outriggers', 'carrier', 'red', 'blue', 'white', 'yellow', 'orange', 'rectangle']}

In [7]:
rail_sub_dict = {'Railway Vehicle' : [],
                 'Passenger Car' : ['wheels', 'rail', 'long', 'windows', 'rectangle'],
                 'Cargo Car' : ['wheels', 'rail', 'long', 'box', 'Shipping Container', 'rectangle'],
                 'Flat Car' : ['wheels', 'rail', 'long', 'bed', 'flat', 'rectangle'],
                 'Tank car' : ['wheels', 'rail', 'long', 'tube', 'cylinder', 'rectangle'],
                 'Locomotive' : ['wheels', 'rail', 'long', 'engine', 'stack', 'rectangle']}

In [8]:
maritime_sub_dict = {'Maritime Vessel' : [],
                     'Motorboat': ['boat', 'water', 'small', 'wake', 'engine', 'sharp', 'white'],
                     'Sailboat': ['boat', 'water', 'small', 'sails', 'sharp', 'white'],
                     'Tugboat': ['boat', 'water', 'small', 'wake', 'engine', 'round', 'red', 'yellow', 'orange', 'white', 'blue', 'fender', 'wide', 'bridge', 'stack'],
                     'Barge': ['boat', 'water', 'medium', 'rectangle', 'red', 'yellow', 'orange', 'blue', 'grey', 'wide'],
                     'Fishing Vessel': ['boat', 'water', 'medium', 'wake', 'engine', 'round', 'red', 'yellow', 'orange', 'white', 'blue', 'green', 'gantry', 'masthead', 'bridge', 'stack'],
                     'Ferry': ['boat', 'water', 'medium', 'wake', 'engine', 'round', 'windows', 'yellow', 'orange', 'white', 'bridge', 'stack', 'wide'],
                     'Yacht': ['boat', 'water', 'medium', 'wake', 'engine', 'sharp', 'white'],
                     'Container Ship': ['boat', 'water', 'large', 'wake', 'engine', 'rectangle', 'white', 'blue', 'red', 'Shipping Container', 'stack', 'bridge'],
                     'Oil Tanker': ['boat', 'water', 'large', 'wake', 'engine', 'round', 'white', 'green', 'red', 'flat', 'stack', 'bridge']}

In [9]:
engineering_sub_dict = {'Engineering Vehicle' : [],
                        'Tower crane' : ['fixed', 'counter weight', 'crane', 'boom', 'cab', 'narrow', 'long', 'thin'],
                        'Container Crane' : ['Shipping Container','fixed', 'counter weight', 'crane', 'boom', 'cab', 'spreader', 'wide', 'long'],
                        'Reach Stacker' : ['crane', 'boom', 'cab', 'spreader', 'wheels', 'windows', 'doors', 'red', 'blue', 'white', 'yellow', 'orange'],
                        'Straddle Carrier' : ['cab', 'spreader', 'wheels', 'windows', 'doors', 'red', 'blue', 'white', 'yellow', 'orange', 'square', 'flat'],
                        'Mobile Crane' : ['wheels', 'tracks', 'windows', 'cab', 'doors', 'boom', 'crane', 'outriggers', 'carrier', 'red', 'blue', 'white', 'yellow', 'orange', 'rectangle'],
                        'Dump Truck' : ['wheels', 'windows', 'cab', 'doors', 'haul bed', 'large', 'square', 'rectangle', 'red', 'blue', 'white', 'yellow', 'orange'],
                        'Haul Truck' : [],
                        'Scraper/Tractor' : ['wheels', 'windows', 'cab', 'doors', 'bowl', 'tractor', 'scraper', 'yellow', 'rectangle', 'trailer'],
                        'Front loader/Bulldozer' : ['wheels', 'windows', 'cab', 'doors', 'scoop', 'yellow', 'orange', 'green'],
                        'Excavator' : ['tracks', 'windows', 'cab', 'doors', 'boom', 'bucket', 'arm', 'square', 'yellow', 'orange', 'red', 'white', 'blade'],
                        'Cement Mixer' : ['wheels', 'windows', 'cab', 'doors', 'hood', 'water tank', 'barrel', 'hopper', 'rectangle', 'red', 'blue', 'white', 'yellow', 'orange'],
                        'Ground Grader' : ['wheels', 'windows', 'cab', 'doors', 'blade', 'frame', 'yellow', 'red', 'orange', 'rectangle']}

In [10]:
building_sub_dict = {'Hut/Tent' : [],
                     'Shed' : [],
                     'Building' : [],
                     'Aircraft Hangar' : [],
                     'Damaged Building' : [],
                     'Facility' : []}

In [11]:
other_struct_sub_dict = {'Construction Site': [],
                         'Vehicle Lot'  : ['vehicles', 'wheels', 'tracks', 'lights', 'fence', 'flat', 'concrete', 'asphalt', 'dirt'],
                         'Helipad' : ['H', 'green', 'red', 'yellow', 'circle', 'flat', 'lights'],
                         'Storage Tank' : ['round', 'cylinder', 'tube', 'white', 'yellow', 'blue'],
                         'Shipping container lot' : ['Shipping Container', 'flat', 'concrete', 'dirt', 'asphalt', 'lights', 'fence'],
                         'Shipping Container' : ['rectangle', 'box', 'small', 'red', 'green', 'yellow', 'blue'],
                         'Pylon' : ['lines', 'grey', 'tall'],
                         'Tower' : []}

In [12]:
# Changed dictionary format for ease of tokenizing
'''
xview_embed_dict = {'Civilian Aircraft' : civ_aircraft_sub_dict,
                    'Civilian Passenger Vehicle' : passenger_sub_dict,
                    'Civilian Truck' : truck_sub_dict,
                    'Railway Vehicle' : rail_sub_dict,
                    'Maritime Vessels' : maritime_sub_dict,
                    'Civilian Engineering' : engineering_sub_dict,
                    'Buildings' : building_sub_dict,
                    'Other Structures' : other_struct_sub_dict}
'''

xview_embed_dict = {**civ_aircraft_sub_dict,
                    **passenger_sub_dict,
                    **truck_sub_dict,
                    **rail_sub_dict,
                    **maritime_sub_dict,
                    **engineering_sub_dict,
                    **building_sub_dict,
                    **other_struct_sub_dict}

## 3. Ensure the given Xview Labels and the Generated Embedding Dictionary Match

In [13]:
# Extract the labels from the Xview Embedding Dictionary
xview_labels = [] # create a list to store possible lables

for key in xview_embed_dict.keys():
    xview_labels.append(key)

In [14]:
# Ensure the Embedded dictionary is the same length as the Xview Labels

if len(xview_label_dict) == len(xview_embed_dict):
    print('The Embedded Dictionary and Label Dictionary are the same size.')
else:
    print('ERROR: The Embedded Dictionary and Label Dictionary are NOT the same size.')

The Embedded Dictionary and Label Dictionary are the same size.


In [15]:
# Ensure the Labels in Embedded dictionary match the given Xview Labels

i = 0

for key in xview_label_dict.keys():
    label_value = xview_label_dict[key]
    if label_value == xview_labels[i]:
        i += 1
    else:
        print(label_value + ' has no match in the embedded dictionary')
        print('Embedded dictionary returns : ' + xview_labels[i] + '\n')
        i += 1

## 4. Generate Labels with Text Embedding for Military Vehicles

***References:***

 ***Worldwide Equipment Guide: https://odin.tradoc.army.mil/WEG***
 
 ***TC 7-100.4 Hybrid Threat Force Structure Organization Guide (2015)***

In [16]:
'''
Fixed Wing : B52, F16, E3
Rotary Wing : UH60, CH47, AH64
UAV : Shadow, predator
'''

aircraft_sub_dict = {'Military Fixed-wing' : ['wings', 'elevators', 'rudder', 'tail', 'cockpit', 'multi engine', 'single engine', 'weapons', 'external fuel tank', 'grey', 'brown', 'green', 'black', 'blue', 'fuselage'],
                     'Military Helicopter' : ['main rotor', 'tail rotor', 'rudder', 'tail',  'cockpit', 'weapons', 'external fuel tank', 'grey', 'brown', 'green', 'black', 'skids', 'wheels', 'fuselage'],
                     'UAV' : ['trailer', 'small', 'wings', 'tail', 'rudder']}

In [17]:
'''
Short Range Anti-Air : Roland-2 French SHORAD, HQ-7 (FM-80) Chinese SHORAD, CSA-41 (FM-80) Iranian SHORAD
Point Defense : LD 2000 (LuDun-2000) Chinese 8x8 Mobile Air Defense Gun Missile System
Long Range Missile : HQ-22 Chinese Long-Range Air Defense Missile System, S-300P (SA-10 Grumble) Russian 8x8 Long-Range Surface-to-Air Missile System
Radar System : P-40 (Long Track) Russian Mobile 3-D UHF Radar System, 96L6E (Cheese Board) Russian Early-Warning and Acquisition Radar
'''

ada_sub_dict = {'General Anti-Air' : ['launch tube', 'turret', 'gun', 'missile', 'rectangle', 'cab', 'hood', 'wheels', 'tracks', 'trailer', 'platform', 'green', 'tan', 'radar', 'array', 'antenna'],
                'Radar System' : ['wheels', 'cab', 'windows', 'doors', 'radar', 'array', 'antenna', 'square', 'green', 'tan', 'Shipping Container']}

In [18]:
'''
Towed Cannon : M777, various field howitzers
Multiple-Rocket Launchers : BM-21 122mm MLR, and various other MLRS
Artillery Command and Reconnaissance : 
'''

artillery_sub_dict = {'Towed Cannon' : ['cannon', 'carriage', 'wheels', 'tan', 'green', 'square'],
                      'Multiple-Rocket Launchers' : ['wheels', 'tracks', 'cab', 'launch tube', 'windows', 'doors', 'tan', 'green', 'rectangle']}

In [19]:
'''
Minelaying systems: GBL-130 Chinese Armored Mine Dispenser, UMZ Russian 6x6 Scatterable Minelaying System
Mine-Clearing Systems : IMR-2M CEV, MR-2 CEV 
Counter Mobility Systems : excavator, scoop loader, backhoe, skid-steer
'''

engineer_sub_dict = {'Minelaying systems' : ['tracks', 'wheels', 'dispenser', 'canister', 'tan', 'green'],
                     'Mine-Clearing Systems' : ['tracks', 'wheels', 'blade', 'rollers', 'crane', 'hydraulic arm', 'tan', 'green'],
                     'Gap-Crossing Systems' : ['bridge', 'crane', 'rectangle', 'tracks', 'tan', 'green'],
                     'Counter Mobility Systems' : ['Excavator', 'Scraper/Tractor', 'Front Loader', 'Ground Grader', 'Dump Truck', 'hydraulic arm', 'scoop', 'blade', 'frontloader', 'cab', 'doors', 'windshield', 'tan', 'green']}

In [20]:
'''
Tank : Abrams, T90
Infantry Carrier : Bradley, BMP
Support Vehicle : M113, M88, other armored recovery vehicles
Self Propelled Artillery : Palidin
'''

armor_sub_dict = {'Main Battle Tank' : ['tracks', 'turret', 'cannon', 'rectangle', 'tan', 'green', 'hatch'],
                  'Infantry Carrier' : ['tracks', 'square', 'tan', 'green', 'hatch'],
                  'Support Vehicle' : ['tracks', 'blade', 'tow arm', 'square', 'tan', 'green', 'hatch'],
                  'Self Propelled Artillery'  : ['tracks', 'turret', 'cannon', 'square', 'tan', 'green', 'hatch']}

In [21]:
'''
Infantry Carriers : Stryker, various wheeled APCs
Light Tactical Vehicles : HMMWV, JLTV, MAT-V
Combat Support Vehicles : M997 Ambulance
Mobile Gun Systems : wheeled howitzer, Stryker MGS, various antitank vehicles
'''

light_sub_dict = {'Infantry Carriers' : ['wheels', 'turret', 'rectangle', 'tan', 'green', 'hatch', 'gun'],
                  'Light Tactical Vehicles' : ['wheels', 'turret', 'windows', 'cab', 'doors', 'rectangle', 'hood', 'trunk', 'tan', 'green'],
                  'Combat Support Vehicles' : ['wheels', 'windows', 'cab', 'doors', 'rectangle', 'hood', 'trunk', 'tan', 'green'],
                  'Mobile Gun Systems' : ['wheels', 'cannon', 'turret', 'rectangle', 'tan', 'green']}

In [22]:
'''
Supply Vehicle : M977 HEMTT, M1094
Recovery Vehicle : Wrecker, various other tow truck type vehicles
Tanker : fuel truck, hippo
'''

logistics_sub_dict = {'Military Supply Vehicle' : ['wheels', 'windows', 'cab', 'doors', 'hood', 'Shipping Container', 'bed', 'trailer', 'tan', 'green'],
                      'Military Recovery Vehicle' : ['wheels', 'windows', 'cab', 'doors', 'winch', 'tow arm', 'tan', 'green'],
                      'Military Fueler' : ['wheels', 'windows', 'cab', 'doors', 'tank', 'tube', 'hood', 'tan', 'green']}

In [23]:
other_sub_dict = {'Command and Control Systems' : ['antenna', 'square', 'green', 'tan'],
                  'Communications Systems' : ['radar', 'array', 'antenna', 'square', 'green', 'tan'],
                  'Electronic Warfare Systems' : ['radar', 'array', 'antenna', 'square', 'green', 'tan']}

In [24]:
# Changed dictionary format for ease of tokenizing
'''
military_embed_dict = {'Military Aircraft' : aircraft_sub_dict,
                       'Air Defense' : ada_sub_dict,
                       'Armor' :armor_sub_dict,
                       'Artillery' : artillery_sub_dict,
                       'Military Engineer and CBRN' : engineer_sub_dict,
                       'Light Military Vehicles' : light_sub_dict,
                       'Military Logistics' : logistics_sub_dict,
                       'Other Military' : other_sub_dict}
'''

military_embed_dict = {**aircraft_sub_dict,
                       **ada_sub_dict,
                       **armor_sub_dict,
                       **artillery_sub_dict,
                       **engineer_sub_dict,
                       **light_sub_dict,
                       **logistics_sub_dict,
                       **other_sub_dict}

## 5. Merge Embedding Dictionaries (if needed)

In [25]:
main_dict = {**xview_embed_dict , **military_embed_dict}

## 6. Dictionary Analyis

In [26]:
features = [] # create a list to store possible features
labels = [] # create a list to store possible lables

for entity in main_dict:
    features.extend(main_dict[entity]) # ectract the features from the data dictionary
    labels.append(entity)

feature_dic = {feature : features.count(feature) for feature in set(features)} # create a dictionary that counts the number of occurances of a given feature

In [27]:
print('There are ' + str(len(main_dict)) + ' possible labels')
print('There are ' + str(len(feature_dic)) + ' possible features to describe the labels')
print('They are:\n')

number = 1
for label in labels:
    print(str(number) + '.\t' + label)
    number +=1

There are 85 possible labels
There are 115 possible features to describe the labels
They are:

1.	Fixed-wing Aircraft
2.	Small Aircraft
3.	Cargo Plane
4.	Helicopter
5.	Passenger Vehicle
6.	Small Car
7.	Bus
8.	Pickup Truck
9.	Utility Truck
10.	Truck
11.	Cargo Truck
12.	Truck w/Box
13.	Truck Tractor
14.	Trailer
15.	Truck w/Flatbed
16.	Truck w/Liquid
17.	Crane Truck
18.	Railway Vehicle
19.	Passenger Car
20.	Cargo Car
21.	Flat Car
22.	Tank car
23.	Locomotive
24.	Maritime Vessel
25.	Motorboat
26.	Sailboat
27.	Tugboat
28.	Barge
29.	Fishing Vessel
30.	Ferry
31.	Yacht
32.	Container Ship
33.	Oil Tanker
34.	Engineering Vehicle
35.	Tower crane
36.	Container Crane
37.	Reach Stacker
38.	Straddle Carrier
39.	Mobile Crane
40.	Dump Truck
41.	Haul Truck
42.	Scraper/Tractor
43.	Front loader/Bulldozer
44.	Excavator
45.	Cement Mixer
46.	Ground Grader
47.	Hut/Tent
48.	Shed
49.	Building
50.	Aircraft Hangar
51.	Damaged Building
52.	Facility
53.	Construction Site
54.	Vehicle Lot
55.	Helipad
56.	Storage Tank
5

In [28]:
import pandas as pd

features_df = pd.DataFrame(list(feature_dic.items()))

features_df.rename(columns = {0:'Feature', 1 : 'Count'}, inplace = True)

features_df = features_df.sort_values(by='Count', ascending=False)

features_df['Frequency'] = features_df['Count']/len(labels)

In [29]:
pd.set_option('display.max_rows', None)

print(features_df)

                Feature  Count  Frequency
26               wheels     38   0.447059
40                  cab     30   0.352941
103               green     29   0.341176
5               windows     28   0.329412
97                doors     27   0.317647
101           rectangle     24   0.282353
7                 white     23   0.270588
19                  tan     22   0.258824
113                 red     21   0.247059
46                 blue     19   0.223529
47               yellow     17   0.200000
18               orange     13   0.152941
94               tracks     12   0.141176
16               square     11   0.129412
52                water      9   0.105882
67                 hood      9   0.105882
70                 long      9   0.105882
31                 boat      9   0.105882
35                  bed      9   0.105882
12               engine      8   0.094118
53              trailer      8   0.094118
2                 crane      8   0.094118
79   Shipping Container      7   0

In [30]:
# consider making colors bright vs subdued instead of listing individual colors?

## 7. Create a Dictionary of tokens for Embedding

In [31]:
# Import needed packages

import nltk
import re

from nltk.tokenize import word_tokenize
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\jingr\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [32]:
# Create a helper function to tokenize the values in the dictionary

def tokenize_list(word_list):
    token_list = []
    for text in word_list:
        # Remove spaces
        text = re.sub(r'([^\s])\s([^\s])', r'\1_\2',text)
    
        # Remove case
        text = text.lower() 
        
        # Store the text in the token list
        token_list.append(text)
        
    return(token_list)

In [33]:
# Tokenize the Xview dataset for labeling

# Create an empty dictionary to store labels
xview_label_dict_embedded = {}


# Match embedded text to labels provided with the Xview data
for key in xview_label_dict.keys():
    xview_label_dict_embedded[key] = set(tokenize_list(xview_embed_dict[xview_label_dict[key]]))

In [34]:
# Print the embedded Xview Data
print("" + "\n".join("{!r}: {!r},".format(k, v) for k, v in xview_label_dict_embedded.items()) + "}")

11: set(),
12: {'wings', 'cockpit', 'single_engine', 'fuselage', 'elevators', 'blue', 'silver', 'red', 'tail', 'rudder', 'white'},
13: {'wings', 'cockpit', 'fuselage', 'elevators', 'multi_engine', 'blue', 'silver', 'red', 'tail', 'rudder', 'white'},
15: {'cockpit', 'fuselage', 'main_rotor', 'blue', 'red', 'tail', 'rudder', 'tail_rotor', 'white'},
17: set(),
18: {'black', 'doors', 'blue', 'trunk', 'wheels', 'hood', 'silver', 'windows', 'cab', 'red', 'small', 'white'},
19: {'long', 'doors', 'blue', 'wheels', 'silver', 'red', 'windows', 'cab', 'rectangle', 'white'},
20: {'black', 'doors', 'blue', 'wheels', 'hood', 'silver', 'windows', 'cab', 'red', 'bed', 'white'},
21: {'doors', 'crane', 'wheels', 'hood', 'windows', 'cab', 'bed', 'winch', 'white', 'box'},
23: set(),
24: {'long', 'doors', 'tractor', 'wheels', 'container', 'trailer', 'windows', 'cab', 'bed', 'rectangle', 'box'},
25: {'doors', 'wheels', 'windows', 'cab', 'bed', 'box'},
26: {'doors', 'wheels', 'trailer', 'windows', 'cab', 'be

## 8. Vectorize the Tokens to be Embedded in the Model