# init

In [1]:
import pandas, IPython.core.display

## open file

In [2]:
src = pandas.ExcelFile('data/real/2017/CAM data from iPads/2017 CAM data from iPads.xlsx')

In [3]:
src.sheet_names

['2017 CAM data Erl',
 'schema (WIP reverse engineer)',
 '2017 CAM iPad data Tyler',
 'Combined iPad 2017 CAM data']

## select sheets

In [4]:
sheets_to_process = {sheet_name.split(' ')[-1]: src.parse(sheet_name) for sheet_name in src.sheet_names
    if any([person_name in sheet_name for person_name in ['Erl', 'Tyler']])}

# rename columns

In [5]:
sheets_to_rename = [sheet for sheet in sheets_to_process.values()
                    if any([str(column).find(' ') for column in sheet.columns])]

In [6]:
for sheet_to_rename in sheets_to_rename:
    sheet_to_rename.rename(mapper=lambda x: str(x).split(' ')[0], axis='columns', inplace=True)

# concatenate sheets

In [7]:
df = pandas.concat(sheets_to_process, names=['Sheet','Row'])

# generate hierarchical index (Python)

In [8]:
import collections

In [9]:
class NestedDefaultdict(collections.defaultdict):
    data = None
    name = None
    parent_key = None

    def __init__(self):
        collections.defaultdict.__init__(self, NestedDefaultdict)

In [10]:
def get_column_tree(frame):
    coltree = NestedDefaultdict()
    for column in frame.columns:
        pointer = coltree
        pointer.parent_key = ''
        for word in str(column).split('__'):
            pointer = pointer[word]
            pointer.parent_key = word
        pointer.name = column
        pointer.data = frame[column]
    coltree.default_factory = None
    return coltree

In [11]:
coltree = get_column_tree(df)

## visualize

In [12]:
def visualize_tree(node):
    """Given a dictionary, display a list of keys, and all keys 
    of any contained dictionaries, recursively. Indent to show depth
    of recursion."""

    def visualize_node(node, depth=0):
        for key, value in node.items():
            output.append(f"{'.   ' * depth}{key}"
                          f"{'/' if value.keys() else ''}"
                          f"{' : ' + str(value.data.count()) if value.data is not None else ''}"
                         )
            visualize_node(value, depth + 1)

    output = []
    visualize_node(node)
    return '\n'.join(output)

In [13]:
print(visualize_tree(coltree))

clients/
.   company : 1
.   displayText : 1
.   fname : 1
.   lname : 1
.   name : 1
fields/
.   client/
.   .   company : 21
.   .   displayText : 21
.   .   fname : 21
.   .   lname : 21
.   .   name : 21
.   crop : 21
.   date : 21
.   desc : 15
.   image : 21
.   name : 21
.   oSets/
.   .   completeSets : 54
.   .   date : 54
.   .   dateCompare : 54
.   .   desc : 0
.   .   growthStage : 54
.   .   oPoints/
.   .   .   id : 335
.   .   .   location/
.   .   .   .   coords/
.   .   .   .   .   accuracy : 250
.   .   .   .   .   altitude : 250
.   .   .   .   .   altitudeAccuracy : 250
.   .   .   .   .   heading : 247
.   .   .   .   .   latitude : 250
.   .   .   .   .   longitude : 250
.   .   .   .   .   speed : 250
.   .   .   .   timestamp : 250
.   .   .   name : 335
.   .   .   observations/
.   .   .   .   a1/
.   .   .   .   .   number : 448
.   .   .   .   a2/
.   .   .   .   .   number : 142
.   .   .   .   a3/
.   .   .   .   .   number : 25
.   .   .   .   anum : 167

# define functions: get nodes, column names, DataFrames

In [14]:
def get_child_columns(node):
    return_value = []
    if node.data is not None:
        return_value.append(node)
    for child in node.values():
        if child.name is not None:
            return_value.append(child)
    return return_value

In [15]:
def get_descendant_columns(node):
    return_value = []
    if node.data is not None:
        return_value.append(node)
    for child in node.values():
        return_value.extend(get_descendant_columns(child))
    return return_value

In [16]:
def get_child_column_names(node):
    return [column.name for column in get_child_columns(node)]

In [17]:
def get_descendant_column_names(node):
    return [column.name for column in get_descendant_columns(node)]

In [18]:
def get_child_frame(node, frame=None):
    if frame is None:
        return pandas.DataFrame({column.name: column.data for column in get_child_columns(node)})
    return frame[get_child_column_names(node)]

In [19]:
def get_descendant_frame(node, frame=None):
    if frame is None:
        return pandas.DataFrame({column.name: column.data for column in get_descendant_columns(node)})
    return frame[get_descendant_column_names(node)]

# convert_datetime

In [20]:
def convert_datetime(frame):
    output_frame = frame.copy()
    
    foplt = 'fields__oSets__oPoints__location__timestamp'
    output_frame[foplt] = pandas.to_datetime(frame[foplt], unit='ms')
    
    for column in ['fields__oSets__date', 'fields__date']:
        output_frame[column] = pandas.to_datetime(output_frame[column], infer_datetime_format=True)
    
    return output_frame

In [21]:
df = convert_datetime(df)

# cleanup

In [22]:
df2 = df.copy().reset_index()

In [23]:
df2.columns

Index(['Sheet', 'Row', 'clients__company', 'clients__displayText',
       'clients__fname', 'clients__lname', 'clients__name',
       'fields__client__company', 'fields__client__displayText',
       'fields__client__fname', 'fields__client__lname',
       'fields__client__name', 'fields__crop', 'fields__date', 'fields__desc',
       'fields__image', 'fields__name', 'fields__oSets__completeSets',
       'fields__oSets__date', 'fields__oSets__dateCompare',
       'fields__oSets__desc', 'fields__oSets__growthStage',
       'fields__oSets__oPoints__id',
       'fields__oSets__oPoints__location__coords__accuracy',
       'fields__oSets__oPoints__location__coords__altitude',
       'fields__oSets__oPoints__location__coords__altitudeAccuracy',
       'fields__oSets__oPoints__location__coords__heading',
       'fields__oSets__oPoints__location__coords__latitude',
       'fields__oSets__oPoints__location__coords__longitude',
       'fields__oSets__oPoints__location__coords__speed',
       'fiel

In [24]:
for column in (
    'Sheet', 'Row',
    'clients__company',
    'clients__displayText',
    'clients__fname',
    'clients__lname',
    'clients__name',
    'fields__client__company',
#     'fields__client__displayText',
    'fields__client__fname',
    'fields__client__lname',
    'fields__client__name', 
#     'fields__crop',
    'fields__date',
#     'fields__desc',
    'fields__image',
#     'fields__name',
#     'fields__oSets__completeSets',
#     'fields__oSets__date', 
    'fields__oSets__dateCompare',
#     'fields__oSets__desc',
#     'fields__oSets__growthStage',
#     'fields__oSets__oPoints__id',
    'fields__oSets__oPoints__location__coords__accuracy',
    'fields__oSets__oPoints__location__coords__altitude',
    'fields__oSets__oPoints__location__coords__altitudeAccuracy',
    'fields__oSets__oPoints__location__coords__heading',
    'fields__oSets__oPoints__location__coords__latitude',
    'fields__oSets__oPoints__location__coords__longitude',
    'fields__oSets__oPoints__location__coords__speed',
    'fields__oSets__oPoints__location__timestamp',
    'fields__oSets__oPoints__name',
#     'fields__oSets__oPoints__observations__a1__number',
#     'fields__oSets__oPoints__observations__a2__number',
#     'fields__oSets__oPoints__observations__a3__number',
#     'fields__oSets__oPoints__observations__anum',
    'fields__oSets__oPoints__observations__complete',
#     'fields__oSets__oPoints__observations__disabled',
    'fields__oSets__oPoints__observations__eVnum',
#     'fields__oSets__oPoints__observations__enum',
    'fields__oSets__oPoints__observations__id',
#     'fields__oSets__oPoints__observations__name',
#     'fields__oSets__oPoints__observations__|',
#     'fields__oSets__oPoints__observations__|__number',
#     'fields__oSets__obsName',
    'fields__oSets__results',
#     'fields__oSets__totalA1', 'fields__oSets__totalA2',
#     'fields__oSets__totalA3', 'fields__oSets__totalA4',
#     'fields__oSets__totalSets',
    'observers'
):
    del df2[column]

In [25]:
df2.columns

Index(['fields__client__displayText', 'fields__crop', 'fields__desc',
       'fields__name', 'fields__oSets__completeSets', 'fields__oSets__date',
       'fields__oSets__desc', 'fields__oSets__growthStage',
       'fields__oSets__oPoints__id',
       'fields__oSets__oPoints__observations__a1__number',
       'fields__oSets__oPoints__observations__a2__number',
       'fields__oSets__oPoints__observations__a3__number',
       'fields__oSets__oPoints__observations__anum',
       'fields__oSets__oPoints__observations__disabled',
       'fields__oSets__oPoints__observations__enum',
       'fields__oSets__oPoints__observations__name',
       'fields__oSets__oPoints__observations__|',
       'fields__oSets__oPoints__observations__|__number',
       'fields__oSets__obsName', 'fields__oSets__totalA1',
       'fields__oSets__totalA2', 'fields__oSets__totalA3',
       'fields__oSets__totalA4', 'fields__oSets__totalSets'],
      dtype='object')

In [26]:
tree = get_column_tree(df2)

# fields

In [27]:
fields_node = tree['fields']
fields_columns = get_child_column_names(fields_node) + get_descendant_column_names(fields_node['client'])
fields = df2[fields_columns].dropna(how='all').sort_values('fields__name')
fields.set_index('fields__crop').sort_index()

Unnamed: 0_level_0,fields__desc,fields__name,fields__client__displayText
fields__crop,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CROPS.BARLEY,Right next to Alvena Canola,Alvena Barley,"AAFC SRDC, Tyler Wist"
CROPS.BARLEY,Right next to SEF wheat,SEF Barley,"AAFC SRDC, Tyler Wist"
CROPS.BARLEY,Next to SEF Wheat and near faba beans,SEF Barley,"AAFC SRDC, Tyler Wist"
CROPS.BARLEY,,Llewelyn Barley,"AAFC SRDC, Tyler Wist"
CROPS.OATS,Next to SEF peas and a wheat field,SEF Oats,"AAFC SRDC, Tyler Wist"
CROPS.OATS,"Next to SEF Peas, near the canola",SEF Oats,"AAFC SRDC, Tyler Wist"
CROPS.WHEAT,Next to soybeans and canola,Outlook wheat 1,"AAFC SRDC, Tyler Wist"
CROPS.WHEAT,Wheat 2 by canola and soybeans but on other si...,Outlook wheat,"AAFC SRDC, Tyler Wist"
CROPS.WHEAT,,Outlook Wheat-2,"AAFC SRDC, Tyler Wist"
CROPS.WHEAT,Wheat 1 is next to the faba beans and soybeans I,Outlook Wheat-1,"AAFC SRDC, Tyler Wist"


# sets

In [28]:
sets_node = fields_node['oSets']
sets_columns = get_child_column_names(sets_node)
sets = df2[sets_columns].dropna(how='all')
sets

Unnamed: 0,fields__oSets__completeSets,fields__oSets__date,fields__oSets__desc,fields__oSets__growthStage,fields__oSets__obsName,fields__oSets__totalA1,fields__oSets__totalA2,fields__oSets__totalA3,fields__oSets__totalA4,fields__oSets__totalSets
0,0.0,2017-08-02 13:12:09.542,,7.0,Tyler,,,,,1.0
70,1.0,2017-08-09 09:25:11.710,,8.0,Tyler,164.0,0.0,0.0,0.0,1.0
140,1.0,2017-08-09 10:06:25.480,,7.0,Tyler,66.0,0.0,0.0,0.0,1.0
210,2.0,2017-08-09 11:21:01.555,,9.0,Stean,0.0,0.0,0.0,0.0,2.0
350,2.0,2017-08-09 11:37:20.862,,8.0,Stean,5.0,5.0,0.0,0.0,2.0
490,1.0,2017-08-22 15:42:05.751,,8.0,Mikki,1.0,0.0,0.0,0.0,1.0
560,2.0,2017-08-17 11:12:02.820,,8.0,Gabrielle,169.0,96.0,0.0,0.0,2.0
700,2.0,2017-08-17 13:06:30.183,,9.0,Stean,78.0,102.0,0.0,0.0,2.0
840,1.0,2017-08-22 16:02:50.682,,8.0,Mikki,187.0,0.0,0.0,0.0,1.0
910,0.0,2017-07-14 12:31:24.194,,6.0,Tyler,,,,,1.0


# points

In [29]:
points_node = sets_node['oPoints']
print(visualize_tree(points_node))

id : 335
observations/
.   a1/
.   .   number : 448
.   a2/
.   .   number : 142
.   a3/
.   .   number : 25
.   anum : 1675
.   disabled : 0
.   enum : 335
.   name : 2010
.   |/ : 3015
.   .   number : 172


In [30]:
points_columns = get_child_column_names(points_node) + get_child_column_names(points_node['location'])
points = df2[points_columns].dropna(how='all')
points

Unnamed: 0,fields__oSets__oPoints__id
0,0.0
14,1.0
28,2.0
42,3.0
56,4.0
70,0.0
84,1.0
98,2.0
112,3.0
126,4.0


# observations

In [31]:
observation_node = points_node['observations']
print(visualize_tree(observation_node))

a1/
.   number : 448
a2/
.   number : 142
a3/
.   number : 25
anum : 1675
disabled : 0
enum : 335
name : 2010
|/ : 3015
.   number : 172


## cleanup

In [110]:
idxcols = {'fields__oSets__date':                               'datetime',
           'fields__oSets__oPoints__id':                        'point_id',
           'fields__oSets__oPoints__observations__name':        'observations_name',
           'fields__oSets__oPoints__observations__|':           'natural_enemy_name',
           'fields__oSets__oPoints__observations__|__number':   'natural_enemy_count',
           }
idf = df2.rename(columns=idxcols)
for column in ['datetime', 'point_id', 'observations_name']:
    idf[column] = idf[column].ffill()

### split string to columns

Information in the column could be used as an index, to separate the data we need to unstack (natural enemy counts) from the data that's already unstacked (aphid counts).

In [111]:
new_columns = idf.pop('observations_name').str.extract('(?P<observation_subject>Aphid|Natural Enemy) Observation ?(?P<observation_id>\d+)?', expand=True)
idf = idf.join(new_columns)

In [112]:
idf = idf.set_index(['datetime', 'point_id','observation_id','observation_subject'])

In [113]:
idf[['natural_enemy_name', 'natural_enemy_count']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,natural_enemy_name,natural_enemy_count
datetime,point_id,observation_id,observation_subject,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-02 13:12:09.542,0.0,1,Aphid,,
2017-08-02 13:12:09.542,0.0,2,Aphid,,
2017-08-02 13:12:09.542,0.0,3,Aphid,,
2017-08-02 13:12:09.542,0.0,4,Aphid,,
2017-08-02 13:12:09.542,0.0,5,Aphid,,
2017-08-02 13:12:09.542,0.0,,Natural Enemy,e1,
2017-08-02 13:12:09.542,0.0,,Natural Enemy,e2,
2017-08-02 13:12:09.542,0.0,,Natural Enemy,e3,
2017-08-02 13:12:09.542,0.0,,Natural Enemy,e4,
2017-08-02 13:12:09.542,0.0,,Natural Enemy,e5,


In [114]:
ne = idf.xs('Natural Enemy', level='observation_subject', drop_level=False)

In [115]:
ne.set_index('natural_enemy_name', append=True, inplace=True)

In [116]:
ne['natural_enemy_count'].dropna()

datetime                 point_id  observation_id  observation_subject  natural_enemy_name
2017-08-09 09:25:11.710  1.0       NaN             Natural Enemy        e2                     5.0
                                                                        e6                     1.0
                                                                        e8                     2.0
                         2.0       NaN             Natural Enemy        e2                     5.0
                                                                        e8                     1.0
                         3.0       NaN             Natural Enemy        e2                     4.0
                         4.0       NaN             Natural Enemy        e2                     2.0
                                                                        e6                     1.0
2017-08-09 10:06:25.480  0.0       NaN             Natural Enemy        e1                     1.0
                  

### fill NaN with 0, unstack enemy counts

In [117]:
ne2 = ne['natural_enemy_count'].fillna(value=0).unstack()

### Remove old column from "left" frame

In [121]:
natural_enemy_column_names = [
    column_name for column_name in idf.columns if 'natural_enemy_' in column_name]

In [123]:
for column_name in natural_enemy_column_names:
    print(column_name)
    del idf[column_name]

natural_enemy_name


KeyError: 'natural_enemy_name'

### remove duplicates created by old index

These won't be needed anymore, and won't be in the "right" frame we're about to merge.

In [124]:
index_columns = ['datetime', 'point_id', 'observation_id','observation_subject']
idf2 = idf.reset_index().drop_duplicates(subset=index_columns).set_index(index_columns)

### merge unstacked natural enemy counts into main frame

In [125]:
idf3 = pandas.merge(idf2, ne2, right_index=True, left_index=True, how='outer')

In [46]:
import itertools

def string_format_range(column_name, bounds):
    lower, upper = bounds
    return [column_name.format(i) for i in range(lower, upper + 1)]

In [129]:
column_arguments = (
    ('fields__oSets__oPoints__observations__a{0}__number', (1, 3)), ('e{0}', (1, 9)))
aphid_num_cols, enemy_num_cols = [
    list(string_format_range(*arguments)) for arguments in column_arguments]

idf3[get_descendant_column_names(get_column_tree(idf2)['fields']['oSets']['oPoints'])+enemy_num_cols]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,fields__oSets__oPoints__observations__a1__number,fields__oSets__oPoints__observations__a2__number,fields__oSets__oPoints__observations__a3__number,fields__oSets__oPoints__observations__anum,fields__oSets__oPoints__observations__disabled,fields__oSets__oPoints__observations__enum,e1,e2,e3,e4,e5,e6,e7,e8,e9
datetime,point_id,observation_id,observation_subject,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2017-08-02 13:12:09.542,0.0,1,Aphid,,,,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,2,Aphid,,,,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,3,Aphid,,,,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,4,Aphid,,,,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,5,Aphid,,,,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,,Natural Enemy,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-08-02 13:12:09.542,1.0,6,Aphid,,,,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,7,Aphid,,,,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,8,Aphid,,,,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,9,Aphid,,,,0.0,,,,,,,,,,,


In [130]:
sets_columns = get_child_column_names(get_column_tree(idf2)['fields']['oSets']['oPoints']['observations'])
idf3[sets_columns + enemy_num_cols]#.dropna(how='all')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,fields__oSets__oPoints__observations__anum,fields__oSets__oPoints__observations__disabled,fields__oSets__oPoints__observations__enum,e1,e2,e3,e4,e5,e6,e7,e8,e9
datetime,point_id,observation_id,observation_subject,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017-08-02 13:12:09.542,0.0,1,Aphid,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,2,Aphid,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,3,Aphid,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,4,Aphid,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,5,Aphid,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,,Natural Enemy,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-08-02 13:12:09.542,1.0,6,Aphid,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,7,Aphid,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,8,Aphid,0.0,,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,9,Aphid,0.0,,,,,,,,,,,


In [176]:
idf4 = idf3.copy() 
# idf4.index = idf4.index.droplevel(level='observation_id')

In [177]:
idf4#.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,fields__client__displayText,fields__crop,fields__desc,fields__name,fields__oSets__completeSets,fields__oSets__desc,fields__oSets__growthStage,fields__oSets__oPoints__observations__a1__number,fields__oSets__oPoints__observations__a2__number,fields__oSets__oPoints__observations__a3__number,...,fields__oSets__totalSets,e1,e2,e3,e4,e5,e6,e7,e8,e9
datetime,point_id,observation_id,observation_subject,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2017-08-02 13:12:09.542,0.0,1,Aphid,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
2017-08-02 13:12:09.542,0.0,2,Aphid,,,,,,,,,,,...,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,3,Aphid,,,,,,,,,,,...,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,4,Aphid,,,,,,,,,,,...,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,5,Aphid,,,,,,,,,,,...,,,,,,,,,,
2017-08-02 13:12:09.542,0.0,,Natural Enemy,,,,,,,,,,,...,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-08-02 13:12:09.542,1.0,6,Aphid,,,,,,,,,,,...,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,7,Aphid,,,,,,,,,,,...,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,8,Aphid,,,,,,,,,,,...,,,,,,,,,,
2017-08-02 13:12:09.542,1.0,9,Aphid,,,,,,,,,,,...,,,,,,,,,,


In [200]:
idf5 = idf4.reorder_levels(('observation_subject', 'datetime', 'point_id', 'observation_id'))#.sort_index()

In [201]:
columns = list(set(idf5.columns) - set(aphid_num_cols) - set(enemy_num_cols))
idf5[columns] = idf5[columns].fillna(method='ffill')

In [202]:
idf5.loc['Aphid',aphid_num_cols].fillna(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fields__oSets__oPoints__observations__a1__number,fields__oSets__oPoints__observations__a2__number,fields__oSets__oPoints__observations__a3__number
datetime,point_id,observation_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-02 13:12:09.542,0.0,1,0.0,0.0,0.0
2017-08-02 13:12:09.542,0.0,2,0.0,0.0,0.0
2017-08-02 13:12:09.542,0.0,3,0.0,0.0,0.0
2017-08-02 13:12:09.542,0.0,4,0.0,0.0,0.0
2017-08-02 13:12:09.542,0.0,5,0.0,0.0,0.0
2017-08-02 13:12:09.542,1.0,6,0.0,0.0,0.0
2017-08-02 13:12:09.542,1.0,7,0.0,0.0,0.0
2017-08-02 13:12:09.542,1.0,8,0.0,0.0,0.0
2017-08-02 13:12:09.542,1.0,9,0.0,0.0,0.0
2017-08-02 13:12:09.542,1.0,10,0.0,0.0,0.0


In [203]:
idf5[aphid_num_cols] = idf5.loc['Aphid',aphid_num_cols].fillna(0)
idf5

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,fields__client__displayText,fields__crop,fields__desc,fields__name,fields__oSets__completeSets,fields__oSets__desc,fields__oSets__growthStage,fields__oSets__oPoints__observations__a1__number,fields__oSets__oPoints__observations__a2__number,fields__oSets__oPoints__observations__a3__number,...,fields__oSets__totalSets,e1,e2,e3,e4,e5,e6,e7,e8,e9
observation_subject,datetime,point_id,observation_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
Aphid,2017-08-02 13:12:09.542,0.0,1,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,0.0,2,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,0.0,3,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,0.0,4,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,0.0,5,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Natural Enemy,2017-08-02 13:12:09.542,0.0,,,,,,0.0,,7.0,,,,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Aphid,2017-08-02 13:12:09.542,1.0,6,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,1.0,7,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,1.0,8,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,1.0,9,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,


In [196]:
idf5.index = idf5.index.droplevel(level='observation_id')

In [197]:
idf5

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fields__client__displayText,fields__crop,fields__desc,fields__name,fields__oSets__completeSets,fields__oSets__desc,fields__oSets__growthStage,fields__oSets__oPoints__observations__a1__number,fields__oSets__oPoints__observations__a2__number,fields__oSets__oPoints__observations__a3__number,...,fields__oSets__totalSets,e1,e2,e3,e4,e5,e6,e7,e8,e9
observation_subject,datetime,point_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Aphid,2017-08-02 13:12:09.542,0.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,0.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,0.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,0.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,0.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Natural Enemy,2017-08-02 13:12:09.542,0.0,,,,,0.0,,7.0,,,,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Aphid,2017-08-02 13:12:09.542,1.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,1.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,1.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
Aphid,2017-08-02 13:12:09.542,1.0,,,,,0.0,,7.0,,,,...,1.0,,,,,,,,,
