In [None]:
import pandas as pd

def read_tbl():
    df = pd.read_excel("../../../../data/MN/2024-8-28/Officer-with-Agency-Data-2024-08-27-15-32-44-1.xlsx", nrows=100)
    return df 

def remove_first_column(df):
    df = df.iloc[:, 1:]
    return df


def propagate_uids(df):
    result_df = df.copy()
    
    # Initialize variables to track current person
    current_uid = None
    current_person_rows = []
    
    # Process rows sequentially
    processed_rows = []
    
    for idx, row in result_df.iterrows():
        # Check if this is a subtotal row (has 'Subtotal' in last_name)
        if isinstance(row['last_name'], str) and 'Subtotal' in row['last_name']:
            # Process accumulated rows for the previous person
            if current_person_rows:
                processed_rows.extend(current_person_rows)
            
            # Reset tracking variables
            current_uid = None
            current_person_rows = []
            
            # Add the subtotal row
            processed_rows.append(row.to_dict())
            
        else:
            # If this row has a non-NaN UID, store it as the current UID
            if pd.notna(row['uid']):
                current_uid = row['uid']
            
            # Create a copy of the row and update its UID
            row_dict = row.to_dict()
            if current_uid is not None:
                row_dict['uid'] = current_uid
            
            # Add this row to the current person's rows
            current_person_rows.append(row_dict)
    
    # Process the last person's rows if any remain
    if current_person_rows:
        processed_rows.extend(current_person_rows)
    
    # Convert processed rows back to DataFrame
    result_df = pd.DataFrame(processed_rows)
    
    # Ensure numeric UIDs where possible
    result_df['uid'] = pd.to_numeric(result_df['uid'], errors='ignore')
    
    return result_df


df = read_tbl()

df = df.rename(columns={"Unnamed: 1": "uid", 
                        "Unnamed: 3": "last_name", 
                        "Unnamed: 4": "first_name", 
                        "Unnamed: 5": "middle_name", 
                        "Unnamed: 6": "agency_name",
                        "Unnamed: 7": "status",
                        "Unnamed: 8": "start_date",
                        "Unnamed: 9": "agency_status",
                        "Unnamed: 10": "end_date"})

def remove_first_8_rows(df):
    df = df.iloc[9:]
    return df

def drop_empty_rows(df):
    return df[~((df.uid == "Subtotal"))]

df = df.pipe(remove_first_column).pipe(propagate_uids).pipe(remove_first_8_rows).pipe(drop_empty_rows)

df = df.drop(columns=["Unnamed: 2"])



df.head(20)

Unnamed: 0,uid,last_name,first_name,middle_name,agency_name,status,start_date,agency_status,end_date
9,1,Andersen,Steven,Peder,Verndale Police Dept.,Terminated,7/15/1973,Former,4/16/1996
10,1,Andersen,Steven,Peder,Beltrami Co. Sheriffs Office,Terminated,5/1/1994,Former,4/27/2007
11,1,Andersen,Steven,Peder,"DPS, Bureau Of Criminal Apprehension",Terminated,7/1/1978,Former,1/1/1980
12,1,Andersen,Steven,Peder,Menahga Police Dept.,Terminated,4/15/1994,Former,3/1/1995
14,10,Dahl,Gary,Frederick,"DPS, Bureau Of Criminal Apprehension",Terminated,7/1/1978,Former,6/30/1998
16,100,Barth,James,Stansberry,Minnesota State Patrol,Terminated,7/1/1978,Former,7/7/1998
18,1000,Salto,Norman,Dean,Douglas Co. Sheriffs Office,Terminated,9/15/2013,Former,9/16/2013
20,10000,Amendola,Thomas,Richard,Canosia Township Police Dept.,Terminated,3/9/1983,Former,1/1/2000
22,10006,Franckowiak,George,Isadore,Gnesen Township,Terminated,5/20/1988,Former,6/30/2004
24,10008,Graves,James,Stephen,Fredenberg Police Dept.,Terminated,5/20/1988,Former,1/31/2000


In [16]:
df

Unnamed: 0,uid,last_name,first_name,middle_name,agency_name,status,start_date,agency_status,end_date
9,1,Andersen,Steven,Peder,Verndale Police Dept.,Terminated,7/15/1973,Former,4/16/1996
10,1,Andersen,Steven,Peder,Beltrami Co. Sheriffs Office,Terminated,5/1/1994,Former,4/27/2007
11,1,Andersen,Steven,Peder,"DPS, Bureau Of Criminal Apprehension",Terminated,7/1/1978,Former,1/1/1980
12,1,Andersen,Steven,Peder,Menahga Police Dept.,Terminated,4/15/1994,Former,3/1/1995
14,10,Dahl,Gary,Frederick,"DPS, Bureau Of Criminal Apprehension",Terminated,7/1/1978,Former,6/30/1998
16,100,Barth,James,Stansberry,Minnesota State Patrol,Terminated,7/1/1978,Former,7/7/1998
18,1000,Salto,Norman,Dean,Douglas Co. Sheriffs Office,Terminated,9/15/2013,Former,9/16/2013
20,10000,Amendola,Thomas,Richard,Canosia Township Police Dept.,Terminated,3/9/1983,Former,1/1/2000
22,10006,Franckowiak,George,Isadore,Gnesen Township,Terminated,5/20/1988,Former,6/30/2004
24,10008,Graves,James,Stephen,Fredenberg Police Dept.,Terminated,5/20/1988,Former,1/31/2000
