# init

## import Python modules

In [1]:
import pandas, IPython.core.display

## open file

In [2]:
src = pandas.ExcelFile('2017 CAM data from iPads.xlsx')
src.sheet_names

['2017 CAM data Erl',
 'schema (WIP reverse engineer)',
 '2017 CAM iPad data Tyler',
 'Combined iPad 2017 CAM data']

## select sheets

In [3]:
sheets_to_process = {
    sheet_name.split(' ')[-1]: src.parse(sheet_name)
    for sheet_name in src.sheet_names
    if any(
        (
            person_name in sheet_name
            for person_name
            in ['Erl', 'Tyler']
        )
    )
}
sheets_to_process.keys()

dict_keys(['Erl', 'Tyler'])

# cleanup

## normalize column names

In [4]:
sheets_needing_column_rename = (
    (sheet_name, sheet)
    for sheet_name, sheet
    in sheets_to_process.items()
    if any(
        (str(column).find(' ')
         for column in sheet.columns)
    )
)
sheets_to_process = {
    sheet_name: sheet.rename(
        mapper=lambda x: str(x).split(' ')[0],
        axis='columns')
    for sheet_name, sheet in sheets_needing_column_rename
}

## concatenate sheets

In [5]:
df = pandas.concat(sheets_to_process,
                   names=['worksheet_name', 'index'])

## reindex columns

In [6]:
names = ['field', 'scope', 'variable']
columns = (
    ('fields__oSets__date',                              'set', 'date'),
    ('fields__oSets__oPoints__id',                       'point', 'id'),
    ('fields__oSets__oPoints__observations__id',         'observation', 'id'),
    ('fields__oSets__oPoints__observations__enum',       'point', 'natural_enemy_subtotal'),
    ('fields__oSets__oPoints__observations__|',          'natural_enemy_count', 'name'),
    ('fields__oSets__oPoints__observations__|__number',  'natural_enemy_count', 'count'),
    ('fields__oSets__oPoints__observations__complete',   'observation', 'complete'),
    ('fields__oSets__oPoints__observations__disabled',   'observation', 'disabled'),
    ('fields__oSets__oPoints__observations__a1__number', 'aphid_count', 'a1'),
    ('fields__oSets__oPoints__observations__a2__number', 'aphid_count', 'a2'),
    ('fields__oSets__oPoints__observations__a3__number', 'aphid_count', 'a3'),
    ('fields__oSets__oPoints__observations__anum',       'observation', 'aphid_subtotal'),
    ('fields__oSets__obsName',                           'set', 'obsName'),
    ('fields__oSets__desc',                              'set', 'desc'),
    ('fields__oSets__growthStage',                       'set', 'growthStage'),
    ('fields__oSets__results',                           'set_results', 'results'),
    ('fields__oSets__totalA1',                           'set_results', 'totalA1'),
    ('fields__oSets__totalA2',                           'set_results', 'totalA2'),
    ('fields__oSets__totalA3',                           'set_results', 'totalA3'),
    ('fields__oSets__totalA4',                           'set_results', 'totalA4'),
    ('fields__oSets__totalSets',                         'set_results', 'totalSets'),
    ('fields__oSets__completeSets',                      'set_results', 'completeSets'),
    ('fields__client__company',                          'field', 'client_company'),
    ('fields__client__displayText',                      'field', 'client_displayText'),
    ('fields__client__fname',                            'field', 'client_fname'),
    ('fields__client__lname',                            'field', 'client_lname'),
    ('fields__client__name',                             'field', 'client_name'),
    ('fields__crop',                                     'field', 'crop'),
    ('fields__date',                                     'field', 'date'),
    ('fields__desc',                                     'field', 'desc'),
    ('fields__image',                                    'field', 'image'),
    ('fields__name',                                     'field', 'name'),
)
columns = pandas.MultiIndex.from_tuples(columns, names=names)
df = df.reindex(columns=columns, level='field')
df.columns = df.columns.droplevel(level='field')

## convert_datetime

In [7]:
for column in [('set','date'), ('field','date')]:
    df[column] = pandas.to_datetime(df[column], infer_datetime_format=True)

## forward fill

In [8]:
fill_columns = [
    ('point', 'id'),
    ('observation', 'id'),
] + list(df[['field', 'set']].columns.values)
df.loc[:, fill_columns] = df.loc[:, fill_columns].ffill()

In [9]:
df = df.set_index(([('set', 'date'), ('point', 'id')])).sort_index().reset_index()
df

scope,set,point,observation,point,natural_enemy_count,natural_enemy_count,observation,observation,aphid_count,aphid_count,...,field,field,field,field,field,field,field,field,field,field
variable,date,id,id,natural_enemy_subtotal,name,count,complete,disabled,a1,a2,...,client_company,client_displayText,client_fname,client_lname,client_name,crop,date,desc,image,name
0,2017-07-14 12:31:24.194,0.0,0.0,,,,,,7.0,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
1,2017-07-14 12:31:24.194,0.0,1.0,,,,,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
2,2017-07-14 12:31:24.194,0.0,2.0,,,,,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
3,2017-07-14 12:31:24.194,0.0,3.0,,,,,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
4,2017-07-14 12:31:24.194,0.0,4.0,,,,,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
5,2017-07-14 12:31:24.194,0.0,5.0,2.0,e1,1.0,1.0,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
6,2017-07-14 12:31:24.194,0.0,5.0,,e2,1.0,,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
7,2017-07-14 12:31:24.194,0.0,5.0,,e3,,,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
8,2017-07-14 12:31:24.194,0.0,5.0,,e4,,,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat
9,2017-07-14 12:31:24.194,0.0,5.0,,e5,,,,,,...,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat


## split frame into parts

### non-aggregate

#### index

In [10]:
index_columns = list(df[[('set', 'date'), ('point', 'id'), ('observation', 'id')]].columns)
index_columns

[('set', 'date'), ('point', 'id'), ('observation', 'id')]

In [11]:
index_df = df.loc[
    df[('observation', 'aphid_subtotal')].notna() | df[('point', 'natural_enemy_subtotal')].notna(),
    index_columns
]
index_df = index_df.set_index(index_columns)
index_df

"(set, date)","(point, id)","(observation, id)"
2017-07-14 12:31:24.194,0.0,0.0
2017-07-14 12:31:24.194,0.0,1.0
2017-07-14 12:31:24.194,0.0,2.0
2017-07-14 12:31:24.194,0.0,3.0
2017-07-14 12:31:24.194,0.0,4.0
2017-07-14 12:31:24.194,0.0,5.0
2017-07-14 12:31:24.194,1.0,0.0
2017-07-14 12:31:24.194,1.0,1.0
2017-07-14 12:31:24.194,1.0,2.0
2017-07-14 12:31:24.194,1.0,3.0


#### categorical

In [12]:
base_df_column_names = list(df[[
#     ('set', 'date'),
#     ('observation', 'complete'),
#     ('observation', 'disabled'),
    ('point', 'id'),
    ('observation', 'id'),
]].columns) + list(df[[
    'field',
    'set',
]].columns)
base_df = df.loc[
    df[('observation', 'aphid_subtotal')].notna() | df[('point', 'natural_enemy_subtotal')].notna(),
    base_df_column_names
]
base_df = base_df.set_index(index_columns)
base_df

Unnamed: 0_level_0,Unnamed: 1_level_0,scope,field,field,field,field,field,field,field,field,field,field,set,set,set
Unnamed: 0_level_1,Unnamed: 1_level_1,variable,client_company,client_displayText,client_fname,client_lname,client_name,crop,date,desc,image,name,obsName,desc,growthStage
"(set, date)","(point, id)","(observation, id)",Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2017-07-14 12:31:24.194,0.0,0.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,0.0,1.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,0.0,2.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,0.0,3.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,0.0,4.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,0.0,5.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,1.0,0.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,1.0,1.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,1.0,2.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2017-07-14 12:31:24.194,1.0,3.0,AAFC SRDC,"AAFC SRDC, Tyler Wist",Tyler,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0


### aphid

In [13]:
aphid_columns = index_columns + list(df[[
    ('observation', 'complete'),
    ('observation', 'disabled'),
]]) + list(df[[
    'aphid_count'
]].columns)
aphid_df = df.loc[df[('observation', 'id')] < 5, aphid_columns].set_index(index_columns).fillna(0)
aphid_df

Unnamed: 0_level_0,Unnamed: 1_level_0,scope,observation,observation,aphid_count,aphid_count,aphid_count
Unnamed: 0_level_1,Unnamed: 1_level_1,variable,complete,disabled,a1,a2,a3
"(set, date)","(point, id)","(observation, id)",Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2017-07-14 12:31:24.194,0.0,0.0,0.0,0.0,7.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,2.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,3.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,4.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,1.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,2.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,3.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,4.0,0.0,0.0,0.0,0.0,0.0


### natural enemies

In [14]:
ne_columns = list(df[[('set', 'date'), ('point', 'id'), ('observation', 'complete'), ('observation', 'disabled')]].columns) + list(df[['natural_enemy_count']].columns)
ne = df.loc[:, ne_columns]
ne = ne.dropna(subset=df[[('natural_enemy_count', 'name')]].columns)
ne = ne.reset_index()
ne = ne.set_index([('set', 'date'), ('point', 'id'), ('natural_enemy_count', 'name')]).fillna(0)
ne

Unnamed: 0_level_0,Unnamed: 1_level_0,scope,index,observation,observation,natural_enemy_count
Unnamed: 0_level_1,Unnamed: 1_level_1,variable,Unnamed: 3_level_1,complete,disabled,count
"(set, date)","(point, id)","(natural_enemy_count, name)",Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2017-07-14 12:31:24.194,0.0,e1,5,1.0,0.0,1.0
2017-07-14 12:31:24.194,0.0,e2,6,0.0,0.0,1.0
2017-07-14 12:31:24.194,0.0,e3,7,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,e4,8,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,e5,9,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,e6,10,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,e7,11,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,e8,12,0.0,0.0,0.0
2017-07-14 12:31:24.194,0.0,e9,13,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,e1,19,1.0,0.0,0.0


#### natural enemy transpose

In [15]:
ne2 = ne.unstack().reset_index()
ne2['index_base'] = ne2.pop(('index','','e1'))
ne2[('observation', 'complete', '')] = ne2.pop(('observation', 'complete','e1'))
ne2[('observation', 'disabled', '')] = ne2.pop(('observation', 'disabled','e1'))

In [18]:
ne2.head()

scope,set,point,index,index,index,index,index,index,index,index,...,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,index_base,observation,observation
variable,date,id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,count,count,count,count,count,count,count,Unnamed: 19_level_1,complete,disabled
"(natural_enemy_count, name)",Unnamed: 1_level_2,Unnamed: 2_level_2,e2,e3,e4,e5,e6,e7,e8,e9,...,e3,e4,e5,e6,e7,e8,e9,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,2017-07-14 12:31:24.194,0.0,6,7,8,9,10,11,12,13,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,1.0,0.0
1,2017-07-14 12:31:24.194,1.0,20,21,22,23,24,25,26,27,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19,1.0,0.0
2,2017-07-14 12:31:24.194,2.0,34,35,36,37,38,39,40,41,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33,1.0,0.0
3,2017-07-14 12:31:24.194,3.0,48,49,50,51,52,53,54,55,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47,1.0,0.0
4,2017-07-14 12:31:24.194,4.0,62,63,64,65,66,67,68,69,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61,1.0,0.0


In [23]:
ne3 = ne2.copy()
ne3[('observation', 'id', '')] = 5
ne3 = ne3.set_index([('set', 'date', ''), ('point', 'id', ''), ('observation', 'id', '')])
ne3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,scope,index,index,index,index,index,index,index,index,observation,observation,...,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,index_base,observation,observation
Unnamed: 0_level_1,Unnamed: 1_level_1,variable,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,complete,complete,...,count,count,count,count,count,count,count,Unnamed: 21_level_1,complete,disabled
Unnamed: 0_level_2,Unnamed: 1_level_2,"(natural_enemy_count, name)",e2,e3,e4,e5,e6,e7,e8,e9,e2,e3,...,e3,e4,e5,e6,e7,e8,e9,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
"(set, date, )","(point, id, )","(observation, id, )",Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3
2017-07-14 12:31:24.194,0.0,5,6,7,8,9,10,11,12,13,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5,1.0,0.0
2017-07-14 12:31:24.194,1.0,5,20,21,22,23,24,25,26,27,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19,1.0,0.0
2017-07-14 12:31:24.194,2.0,5,34,35,36,37,38,39,40,41,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33,1.0,0.0
2017-07-14 12:31:24.194,3.0,5,48,49,50,51,52,53,54,55,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47,1.0,0.0
2017-07-14 12:31:24.194,4.0,5,62,63,64,65,66,67,68,69,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61,1.0,0.0


In [24]:
ne3 = ne3[['natural_enemy_count']]
ne3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,scope,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count
Unnamed: 0_level_1,Unnamed: 1_level_1,variable,count,count,count,count,count,count,count,count,count
Unnamed: 0_level_2,Unnamed: 1_level_2,"(natural_enemy_count, name)",e1,e2,e3,e4,e5,e6,e7,e8,e9
"(set, date, )","(point, id, )","(observation, id, )",Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3
2017-07-14 12:31:24.194,0.0,5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,2.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,3.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,4.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
ne3.index = ne3.index.set_names([ ('set', 'date'), ('point', 'id'), ('observation', 'id')])
ne3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,scope,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count
Unnamed: 0_level_1,Unnamed: 1_level_1,variable,count,count,count,count,count,count,count,count,count
Unnamed: 0_level_2,Unnamed: 1_level_2,"(natural_enemy_count, name)",e1,e2,e3,e4,e5,e6,e7,e8,e9
"(set, date)","(point, id)","(observation, id)",Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3
2017-07-14 12:31:24.194,0.0,5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,2.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,3.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,4.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### flatten unstacked column names

In [17]:
new_names2 = [tuple([c[0], c[2] or c[1], None]) for c in ne3.columns.values]
ne3.columns = pandas.MultiIndex.from_tuples(new_names2).droplevel(2)
ne3

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,natural_enemy_count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,e1,e2,e3,e4,e5,e6,e7,e8,e9
"(set, date)","(point, id)","(observation, id)",Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
2017-07-14 12:31:24.194,0.0,5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,1.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,2.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,3.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-14 12:31:24.194,4.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-18 10:31:22.263,0.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-18 10:31:22.263,1.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-18 10:31:22.263,2.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-18 10:31:22.263,3.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-07-18 10:31:22.263,4.0,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### remainder

In [18]:
remainder_column_names = list(set(df.columns) - set(base_df.columns) - set(aphid_columns) - set(ne_columns) | set(index_columns))
remainder_df = df.loc[
    df[('observation', 'aphid_subtotal')].notna() | df[('point', 'natural_enemy_subtotal')].notna(),
    remainder_column_names
].sort_index(axis='columns').set_index(index_columns)
remainder_df


Unnamed: 0_level_0,Unnamed: 1_level_0,scope,observation,point,set_results,set_results,set_results,set_results,set_results,set_results,set_results
Unnamed: 0_level_1,Unnamed: 1_level_1,variable,aphid_subtotal,natural_enemy_subtotal,completeSets,results,totalA1,totalA2,totalA3,totalA4,totalSets
"(set, date)","(point, id)","(observation, id)",Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
2017-07-14 12:31:24.194,0.0,0.0,0.0,,0.0,,,,,,1.0
2017-07-14 12:31:24.194,0.0,1.0,0.0,,,,,,,,
2017-07-14 12:31:24.194,0.0,2.0,0.0,,,,,,,,
2017-07-14 12:31:24.194,0.0,3.0,0.0,,,,,,,,
2017-07-14 12:31:24.194,0.0,4.0,0.0,,,,,,,,
2017-07-14 12:31:24.194,0.0,5.0,,2.0,,,,,,,
2017-07-14 12:31:24.194,1.0,0.0,0.0,,,,,,,,
2017-07-14 12:31:24.194,1.0,1.0,0.0,,,,,,,,
2017-07-14 12:31:24.194,1.0,2.0,0.0,,,,,,,,
2017-07-14 12:31:24.194,1.0,3.0,0.0,,,,,,,,


## merge parts

In [19]:
reindexed_base = base_df.reset_index(
    level=[
        ('observation', 'id'),
        ('point', 'id')
    ],
    drop=True
).drop_duplicates()

In [20]:
joined_sum = index_df.join(
    aphid_df
).join(
    ne3[['natural_enemy_count']]
).join(
    remainder_df
).sum(
    level=[('set', 'date'), ('point', 'id')]
).join(
    reindexed_base
).reset_index()
joined_sum

scope,set,point,observation,observation,aphid_count,aphid_count,aphid_count,natural_enemy_count,natural_enemy_count,natural_enemy_count,...,field,field,field,field,field,field,field,set,set,set
variable,date,id,complete,disabled,a1,a2,a3,e1,e2,e3,...,client_lname,client_name,crop,date,desc,image,name,obsName,desc,growthStage
0,2017-07-14 12:31:24.194,0.0,0.0,0.0,7.0,0.0,0.0,1.0,1.0,0.0,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
1,2017-07-14 12:31:24.194,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2,2017-07-14 12:31:24.194,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
3,2017-07-14 12:31:24.194,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
4,2017-07-14 12:31:24.194,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
5,2017-07-18 10:31:22.263,0.0,5.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,...,,,,NaT,,,,,,
6,2017-07-18 10:31:22.263,1.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,NaT,,,,,,
7,2017-07-18 10:31:22.263,2.0,5.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,...,,,,NaT,,,,,,
8,2017-07-18 10:31:22.263,3.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,NaT,,,,,,
9,2017-07-18 10:31:22.263,4.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,NaT,,,,,,


In [21]:
joined_df = index_df.join(
    aphid_df
).join(
    ne3[['natural_enemy_count']]
).join(
    remainder_df
).join(
    base_df
).reset_index()
joined_df

scope,set,point,observation,observation,observation,aphid_count,aphid_count,aphid_count,natural_enemy_count,natural_enemy_count,...,field,field,field,field,field,field,field,set,set,set
variable,date,id,id,complete,disabled,a1,a2,a3,e1,e2,...,client_lname,client_name,crop,date,desc,image,name,obsName,desc,growthStage
0,2017-07-14 12:31:24.194,0.0,0.0,0.0,0.0,7.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
1,2017-07-14 12:31:24.194,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
2,2017-07-14 12:31:24.194,0.0,2.0,0.0,0.0,0.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
3,2017-07-14 12:31:24.194,0.0,3.0,0.0,0.0,0.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
4,2017-07-14 12:31:24.194,0.0,4.0,0.0,0.0,0.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
5,2017-07-14 12:31:24.194,0.0,5.0,,,,,,1.0,1.0,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
6,2017-07-14 12:31:24.194,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
7,2017-07-14 12:31:24.194,1.0,1.0,0.0,0.0,0.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
8,2017-07-14 12:31:24.194,1.0,2.0,0.0,0.0,0.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0
9,2017-07-14 12:31:24.194,1.0,3.0,0.0,0.0,0.0,0.0,0.0,,,...,Wist,Tyler Wist,CROPS.WHEAT,2017-07-14 12:30:31.587,Next to soybeans and canola,img/wheat.png,Sef wheat,Tyler,,6.0


# save file

In [22]:
joined_sum.columns = joined_sum.columns.map('_'.join)
joined_df.columns = joined_df.columns.map('_'.join)
with pandas.ExcelWriter('data/real/2017/CAM data from iPads/out_df.xlsx') as file_writer:
    joined_sum.to_excel(
        file_writer,
        sheet_name='2017 - CAM - sum',
        freeze_panes=(1, 1),
        index_label='row'
    )
    joined_df.to_excel(
        file_writer,
        sheet_name='2017 - CAM - detailed',
        freeze_panes=(1, 1),
        index_label='row'
    )
    file_writer.save()