In [16]:
import pandas as pd
import numpy as np

In [17]:
def get_image_bird_relationship(file="images.txt"):
    """
    Given a path to the images file, returns a dict maps image id to bird names 
    """
    image_to_bird = dict()

    with open(file, "r") as f:
        for line in f:
            id_and_name = line.split(" ")
            id = int(id_and_name[0])
            file_name = id_and_name[1]
            parts = file_name.split("_")
            bird = ""
            for part in parts:
                if (part.isalpha()):
                    bird += part + " "
                else:
                    break
            image_to_bird[id] = bird.strip()

    return image_to_bird

In [18]:
def get_id_attr_relationship(file="attributes.txt"):
    """
    Given a path to the attributes file, returns a dict maps attributes id to attributes name 
    """
    id_to_attr = dict()

    with open(file, "r") as f:
        for line in f:
            id_and_attr = line.split(" ")
            id = int(id_and_attr[0])
            attr = id_and_attr[1].strip()
            id_to_attr[id] = attr

    return id_to_attr

In [19]:
def get_bird_attr_relationship(image_to_bird, file="labels.txt"):
    """
    Given the image to bird relationship and labels, returns a dict maps a bird name to a set of attributes it has
    """
    bird_to_attrs = dict()

    # bird -> attr -> count of definitely
    definitely_count = dict()
    # bird -> attr -> count of probably
    probably_count = dict()
    # bird -> attr -> count of guessing
    guessing_count = dict()
    # bird -> attr -> count 
    bird_attr_count = dict()

    with open(file, "r") as f:
        for line in f:
            parts = line.split(" ")
            image_id = int(parts[0])
            attribute_id = int(parts[1])
            is_present = int(parts[2])
            certainty_id = int(parts[3])
            worker_id = int(parts[4])

            # bird name
            bird = image_to_bird[image_id]
                           
            if (is_present == 1):
                
                # increment attribute count for the bird
                attr_count = bird_attr_count.get(bird, dict())
                count = attr_count.get(attribute_id, 0)
                count += 1
                attr_count[attribute_id] = count
                bird_attr_count[bird] = attr_count

                # definitely 
                if (certainty_id == 1):
                    attr_count = definitely_count.get(bird, dict())
                    count = attr_count.get(attribute_id, 0)
                    count += 1
                    attr_count[attribute_id] = count
                    definitely_count[bird] = attr_count
                
                # probably
                elif (certainty_id == 0):
                    attr_count = probably_count.get(bird, dict())
                    count = attr_count.get(attribute_id, 0)
                    count += 1
                    attr_count[attribute_id] = count
                    probably_count[bird] = attr_count
                
                # guessing 
                elif (certainty_id == 2):
                    attr_count = guessing_count.get(bird, dict())
                    count = attr_count.get(attribute_id, 0)
                    count += 1
                    attr_count[attribute_id] = count
                    guessing_count[bird] = attr_count


    for bird in bird_attr_count:
        attr_count = bird_attr_count[bird]
        bird_attrs = set()

        for attr in attr_count:
            total = attr_count.get(attr, 0)
            definitely = definitely_count[bird].get(attr, 0)
            probably = probably_count[bird].get(attr, 0)
            guessing = guessing_count[bird].get(attr, 0)

            # parameters are subject to change

            # more than 10% definity present, add the attribute 
            if (definitely / total > 0.1):
                bird_attrs.add(attr)

            # more than 50% probably present, add the attribute 
            if (probably / total > 0.5):
                bird_attrs.add(attr)
            
            # more than 80% guessing present, add the attribute
            if (guessing / total > 0.8):
                bird_attrs.add(attr)
        
        bird_to_attrs[bird] = bird_attrs
    
    return bird_to_attrs

In [20]:
def get_bird_attr_df(bird_to_attrs, id_to_attr):
    """
    Returns a DataFrame of the bird to attributes dict
    """
    df = pd.DataFrame(columns=["bird_name"] + [id_to_attr[i] for i in range(288)])
    
    index = 0
    for bird in bird_to_attrs:
        attrs = bird_to_attrs[bird]
        attrs_list = [1 if attr_id in attrs else 0 for attr_id in range(288)]
        df.loc[index] = [bird] + attrs_list
        index += 1
    
    return df

In [21]:
image_to_bird = get_image_bird_relationship("images.txt")

In [22]:
id_to_attr = get_id_attr_relationship("attributes.txt")

In [23]:
bird_to_attrs = get_bird_attr_relationship(image_to_bird, "labels.txt")

In [24]:
df = get_bird_attr_df(bird_to_attrs, id_to_attr)

In [25]:
df

Unnamed: 0,bird_name,has_crown_color::blue,has_crown_color::black,has_crown_color::orange,has_crown_color::buff,has_crown_color::brown,has_crown_color::grey,has_crown_color::white,has_crown_color::red,has_crown_color::pink,has_crown_color::rufous,has_crown_color::iridescent,has_crown_color::yellow,has_crown_color::olive,has_crown_color::purple,has_crown_color::green,has_nape_color::white,has_nape_color::black,has_nape_color::brown,has_nape_color::buff,has_nape_color::grey,has_nape_color::yellow,has_nape_color::red,has_nape_color::orange,has_nape_color::iridescent,has_nape_color::olive,has_nape_color::green,has_nape_color::blue,has_nape_color::rufous,has_nape_color::pink,has_nape_color::purple,has_bill_shape::cone,has_bill_shape::all-purpose,has_bill_shape::dagger,has_bill_shape::hooked,has_bill_shape::hooked.1,has_bill_shape::curved,has_bill_shape::spatulate,has_bill_shape::needle,has_bill_shape::specialized,...,has_under_tail_color::purple,has_upper_tail_color::brown,has_upper_tail_color::black,has_upper_tail_color::grey,has_upper_tail_color::buff,has_upper_tail_color::white,has_upper_tail_color::yellow,has_upper_tail_color::rufous,has_upper_tail_color::olive,has_upper_tail_color::blue,has_upper_tail_color::iridescent,has_upper_tail_color::orange,has_upper_tail_color::green,has_upper_tail_color::red,has_upper_tail_color::pink,has_upper_tail_color::purple,has_wing_pattern::striped,has_wing_pattern::spotted,has_wing_pattern::solid,has_wing_pattern::multi-colored,has_wing_color::black,has_wing_color::buff,has_wing_color::grey,has_wing_color::white,has_wing_color::brown,has_wing_color::yellow,has_wing_color::purple,has_wing_color::iridescent,has_wing_color::blue,has_wing_color::olive,has_wing_color::rufous,has_wing_color::orange,has_wing_color::red,has_wing_color::green,has_wing_color::pink,has_belly_color::purple,has_eye_color::purple,has_underparts_color::purple,has_under_tail_color::pink,has_eye_color::pink
0,Downy Woodpecker,1,1,1,0,0,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,0,...,0,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
1,Song Sparrow,0,1,1,1,1,1,1,1,0,1,0,1,0,0,0,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,0,...,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0
2,Northern Flicker,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,1,0,...,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,0,0,0,0
3,Vesper Sparrow,0,1,0,1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,...,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0
4,Philadelphia Vireo,0,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,1,1,1,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,1,1,0,0,0,...,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Rose breasted Grosbeak,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,0,1,1,0,0,1,...,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,0,1,1,1,1,1,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0
196,Summer Tanager,0,0,0,1,1,1,0,1,0,1,0,0,0,0,0,1,1,0,1,1,1,1,1,0,0,0,0,1,0,0,1,1,1,0,1,1,0,0,1,...,0,0,1,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0
197,Tree Sparrow,0,1,0,1,1,1,1,0,0,1,1,0,0,0,1,1,1,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,...,0,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0
198,Whip poor Will,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,1,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,1,1,1,0,0,1,...,0,1,1,1,1,1,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1


In [26]:
df.to_csv('bird_to_attrs.csv', index=False)