In [25]:
import pandas as pd
import numpy as np

In [26]:
#Returns consolidated df with coincident rows rolled up. Coincident refers to rows
#having same date/time that are populated in various columns.  In case of rows where same
#column is populated as previous, coincident row, no consolidation occurs unless
#column is in a separate, 'override' list.
#Version of 3/16/20 - uses .loc instead of .iloc; consolidates "downward"

def RollupCoincidentRows(df_in, dt_col, lst_cols, lst_override, IsFlagConflicts, IsDeleteCoinc):

    df = df_in.copy()
    
    #Add flag columns and populate with defaults
    kp_col, confl_col, coinc_col = 'keep', 'RowConflict', 'IsCoincident'
    df[kp_col], df[confl_col], df[coinc_col] = True, False, False

    #Start with second row
    idxFirst = df.index.values[0]
    for idx, row in df.iloc[1:].iterrows():
    
        #Skip rows already flagged for deletion
        idxPrev = IndexPrev(df, idx)
        while idxPrev == idxFirst and not df[kp_col].loc[idxPrev]:
            idxPrev = IndexPrev(df, idxPrev)

        #Consolidate if idx and idxPrev are coincident and idx's data don't conflict
        if row[dt_col] == df[dt_col].loc[idxPrev]:
            df[coinc_col].loc[idx], df[coinc_col].loc[idxPrev] = True, True

            #Default is no conflicts; keep=False for row i
            IsIrresolvable, IsConflict = False, False
            df[kp_col].loc[idxPrev] = False

            #Check each column
            for col in lst_cols:
                if not IsRowConflict(df, idx,idxPrev, col):
                    if IsNullCell(df, idx, col): df[col].loc[idx] = df[col].loc[idxPrev]

                elif col in lst_override:
                    IsConflict = True
                else: IsConflict, IsIrresolvable = True, True

                #Flag conflict whether overridden or not
                if IsConflict and IsFlagConflicts:
                    df[confl_col].loc[idxPrev], df[confl_col].loc[idx] = True, True

            #Don't drop the row if unresolved conflicts
            if IsIrresolvable: df[kp_col].loc[idxPrev] = True

                
    #Return after dropping flagged rows and Boolean columns
    if IsDeleteCoinc:
        if not IsFlagConflicts: df.drop(confl_col, axis=1, inplace=True)
        df.drop(coinc_col, axis=1, inplace=True)
        return df[df[kp_col]].drop(kp_col, axis=1)
    else:
        return df
    
def IsRowConflict(df, idx, idxPrev, col):
    if not IsNullCell(df, idxPrev, col):
        if not IsNullCell(df, idx, col): return True
    return False

#TRUE if row i of df col is NaN
def IsNullCell(df, idx, col):
    if pd.isnull(df[col].loc[idx]): return True
    return False

#Returns the index of the previous row
def IndexPrev(df, idx):
    return df.index.values[df.index.get_loc(idx) - 1]
    

In [27]:
df = pd.read_csv('Row_rollup.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
timestamp    10 non-null datetime64[ns]
Its_on       4 non-null float64
Its_off      2 non-null float64
Other        4 non-null float64
Comment      10 non-null object
dtypes: datetime64[ns](1), float64(3), object(1)
memory usage: 528.0+ bytes


In [28]:
df

Unnamed: 0,timestamp,Its_on,Its_off,Other,Comment
0,2020-01-08 02:00:00,1.0,,,Coincident - Should roll up
1,2020-01-08 02:00:00,,,300.0,Coincident - Should roll up
2,2020-01-08 03:00:00,,1.0,,Not coincident
3,2020-01-08 04:00:00,2.0,,,Conflict
4,2020-01-08 04:00:00,1.0,,,Conflict
5,2020-01-08 04:30:00,,,400.0,Not coincident
6,2020-01-08 16:00:00,,,100.0,Coincident - Should roll up
7,2020-01-08 16:00:00,,3.0,,Coincident - Should roll up
8,2020-01-08 16:00:00,1.0,,,Coincident - Should roll up
9,2020-01-08 20:00:00,,,800.0,Not coincident


In [29]:
lst_cols = ['Its_on','Its_off','Other']

Example 1:  Column values are not in conflict, so rows 0 and 1 get consolidated

In [30]:
RollupCoincidentRows(df.loc[0:2].copy(), 'timestamp', lst_cols, [], True, False)

Unnamed: 0,timestamp,Its_on,Its_off,Other,Comment,keep,RowConflict,IsCoincident
0,2020-01-08 02:00:00,1.0,,,Coincident - Should roll up,False,False,True
1,2020-01-08 02:00:00,1.0,,300.0,Coincident - Should roll up,True,False,True
2,2020-01-08 03:00:00,,1.0,,Not coincident,True,False,False


In [31]:
RollupCoincidentRows(df.loc[0:2], 'timestamp', lst_cols, [], True, True)

Unnamed: 0,timestamp,Its_on,Its_off,Other,Comment,RowConflict
1,2020-01-08 02:00:00,1.0,,300.0,Coincident - Should roll up,False
2,2020-01-08 03:00:00,,1.0,,Not coincident,False


Example 2:  Rows 3 and 4 are in conflict; Since lst_override is empty, rows do not get consolidated

In [32]:
df_test = df.loc[3:4].copy()
RollupCoincidentRows(df_test, 'timestamp', lst_cols, [], True, False)

Unnamed: 0,timestamp,Its_on,Its_off,Other,Comment,keep,RowConflict,IsCoincident
3,2020-01-08 04:00:00,2.0,,,Conflict,True,True,True
4,2020-01-08 04:00:00,1.0,,,Conflict,True,True,True


Example 3: Same data rows as Example 2, but `Its_on` column listed as ok to override; this causes rows to be consolidated and retains value from last coincident row."

In [33]:
df_test = df.loc[3:4].copy()
RollupCoincidentRows(df_test, 'timestamp', lst_cols, ['Its_on'], True, True)

Unnamed: 0,timestamp,Its_on,Its_off,Other,Comment,RowConflict
4,2020-01-08 04:00:00,1.0,,,Conflict,True


Example 4: Three coincident rows (6, 7 and 8 in original DataFrame)

In [34]:
df_test = df.loc[6:8].copy()
RollupCoincidentRows(df_test, 'timestamp', lst_cols, [], True, False)

Unnamed: 0,timestamp,Its_on,Its_off,Other,Comment,keep,RowConflict,IsCoincident
6,2020-01-08 16:00:00,,,100.0,Coincident - Should roll up,False,False,True
7,2020-01-08 16:00:00,,3.0,100.0,Coincident - Should roll up,False,False,True
8,2020-01-08 16:00:00,1.0,3.0,100.0,Coincident - Should roll up,True,False,True


In [35]:
#df_test = df.loc[6:8].reset_index(drop=True).copy()
df_test = df.loc[6:8].copy()

RollupCoincidentRows(df_test, 'timestamp', lst_cols, [], True, True)

Unnamed: 0,timestamp,Its_on,Its_off,Other,Comment,RowConflict
8,2020-01-08 16:00:00,1.0,3.0,100.0,Coincident - Should roll up,False


Example 5: All Data

In [36]:
RollupCoincidentRows(df, 'timestamp', lst_cols, lst_cols, True, True)

Unnamed: 0,timestamp,Its_on,Its_off,Other,Comment,RowConflict
1,2020-01-08 02:00:00,1.0,,300.0,Coincident - Should roll up,False
2,2020-01-08 03:00:00,,1.0,,Not coincident,False
4,2020-01-08 04:00:00,1.0,,,Conflict,True
5,2020-01-08 04:30:00,,,400.0,Not coincident,False
8,2020-01-08 16:00:00,1.0,3.0,100.0,Coincident - Should roll up,False
9,2020-01-08 20:00:00,,,800.0,Not coincident,False
