In [None]:
# ============================================================
#  Rutgers Healthcare Dataset Cleaning — Full Colab Script
#  One file: setup → load → clean → audit → union → merge → dedup → save
# ============================================================

# -----------------------------
# 0) Install & imports
# -----------------------------
!pip -q install pandas numpy chardet openpyxl pyjanitor rapidfuzz unidecode seaborn

import pandas as pd
import numpy as np
import re, io, chardet
import janitor  # pyjanitor
from rapidfuzz import fuzz, process
from unidecode import unidecode
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 220)

print("✅ Libraries installed & imported.")


# -----------------------------
# 1) Helpers: encoding-safe reader, column cleanup, previews
# -----------------------------
def detect_encoding(path, nbytes=1_000_000):
    with open(path, 'rb') as f:
        raw = f.read(nbytes)
    det = chardet.detect(raw)
    return det.get('encoding') or 'utf-8'

def safe_read_csv(path, **kwargs):
    encs = [kwargs.pop('encoding', None), detect_encoding(path), 'utf-8', 'cp1252', 'latin1', 'ISO-8859-1']
    tried, last_err = set(), None
    for enc in encs:
        if not enc or enc in tried:
            continue
        tried.add(enc)
        try:
            return pd.read_csv(path, encoding=enc, **kwargs)
        except Exception as e:
            last_err = e
    raise last_err

def clean_column_names(df):
    def _c(s):
        s = str(s).replace('\xa0',' ').strip()
        s = re.sub(r'\s+', ' ', s)        # collapse whitespace
        s = s.replace('–','-')            # normalize dashes
        return s
    df = df.copy()
    df.columns = [_c(c) for c in df.columns]
    return df

def preview(df, n=5, title=None):
    if title: print(f"\n=== {title} ===")
    print(f"Shape: {df.shape}")
    display(df.head(n))
    print("Columns:", list(df.columns))

def key_health(df, label):
    n = len(df)
    has_netid = df['NetID'].notna().sum() if 'NetID' in df.columns else 0
    has_email = df['Email'].notna().sum() if 'Email' in df.columns else 0
    both = (df['NetID'].notna() & df['Email'].notna()).sum() if {'NetID','Email'}.issubset(df.columns) else 0
    print(f"{label} — rows: {n} | NetID present: {has_netid} | Email present: {has_email} | Both: {both}")

def normalize_text_series(s):
    return s.astype(str).str.strip().str.lower().replace({'nan': pd.NA, 'none': pd.NA})

def standardize_identity_cols(df, colmap):
    """
    Create canonical identity keys: FirstName, LastName, NetID, Email
    colmap = {'first':[...], 'last':[...], 'netid':[...], 'email':[...]}
    """
    out = df.copy()
    def copy_first(target, candidates):
        for c in candidates:
            if c in out.columns:
                out[target] = out[c]
                return
        out[target] = pd.NA

    copy_first('FirstName', colmap.get('first', []))
    copy_first('LastName',  colmap.get('last',  []))
    copy_first('NetID',     colmap.get('netid', []))
    copy_first('Email',     colmap.get('email', []))

    # normalize values
    out['FirstName'] = out['FirstName'].apply(lambda x: unidecode(str(x)).strip() if pd.notna(x) else pd.NA)
    out['LastName']  = out['LastName'].apply(lambda x: unidecode(str(x)).strip() if pd.notna(x) else pd.NA)
    out['NetID']     = normalize_text_series(out['NetID'])
    out['Email']     = normalize_text_series(out['Email'])
    return out

def split_full_name(df):
    """
    If a 'Name' column exists and First/Last are empty, split into FirstName / LastName (on first space).
    """
    if 'Name' in df.columns:
        # ✅ FIX: use keyword arguments instead of positional ones
        parts = df['Name'].astype(str).str.strip().str.split(pat=' ', n=1, expand=True)
        # fill only where First/LastName are missing
        if 'FirstName' not in df.columns:
            df['FirstName'] = pd.NA
        if 'LastName' not in df.columns:
            df['LastName'] = pd.NA
        df.loc[df['FirstName'].isna(), 'FirstName'] = parts[0]
        if parts.shape[1] > 1:
            df.loc[df['LastName'].isna(), 'LastName'] = parts[1]
    return df


def core_view(df):
    core_cols = [
        'FirstName','LastName','NetID','Email',
        'TITLE','Title','TRACK','Department/Division','DeptNm','Department',
        'PRIMARYSCHOOL','School/Div','NPI_NUMBER','CAMPUS','role','ETyp','Empl Class Name'
    ]
    keep = [c for c in core_cols if c in df.columns]
    # ensure keys are at front
    for k in ['FirstName','LastName','NetID','Email']:
        if k not in keep: keep.insert(0, k)
    # dedupe while preserving order
    seen, ordered = set(), []
    for c in keep:
        if c not in seen:
            ordered.append(c); seen.add(c)
    return df[ordered].copy()

def add_clinical_placeholder(df):
    """
    Placeholder (non-final) clinical flag. We'll refine later when you provide extra files/rules.
    """
    out = df.copy()
    if 'TRACK' in out.columns:
        track_flag = out['TRACK'].astype(str).str.lower().str.contains('clinical', na=False)
    else:
        track_flag = pd.Series(False, index=out.index)
    title_flag = out.get('TITLE', out.get('Title', pd.Series('', index=out.index))).astype(str).str.lower().str.contains('clinical', na=False)
    out['Clinical_Status_placeholder'] = np.where(track_flag | title_flag, 'clinical_candidate', 'unknown')
    return out

def union_postdocs(df_all, df_no):
    # simple union + exact dedup
    out = pd.concat([df_all, df_no], ignore_index=True)
    out = out.drop_duplicates()
    return out


# -----------------------------
# 2) Column maps (for standardization)
# -----------------------------
colmap_faculty = {
    'first': ['FIRSTNAME','First Name'],
    'last':  ['LASTNAME','Last Name'],
    'netid': ['NETID','Net ID','Netid','NetID'],
    'email': ['EMAILADDRESS','Email']
}
colmap_pwac = {
    'first': ['First Name','FirstName'],
    'last':  ['Last Name','LastName'],
    'netid': ['Netid','Net ID','NetID'],
    'email': ['Organizational Email','Primary Email Address','Email']
}
colmap_nick = {
    'first': ['FirstName','FIRSTNAME','First Name'],
    'last':  ['LastName','LASTNAME','Last Name'],
    'netid': ['NetID','NETID','Net ID','Netid'],
    'email': ['Email','EMAILADDRESS']
}
colmap_post = {
    'first': ['FIRSTNAME','First Name','First'],
    'last':  ['LASTNAME','Last Name','Last'],
    'netid': ['Net ID','NETID','NetID','Netid'],
    'email': ['Email','EMAILADDRESS','Organizational Email']
}


# -----------------------------
# 3) File paths (adjust if needed)
# -----------------------------
# Ensure these match the names in your Colab "Files" pane.
path_faculty  = "/content/All Faculty Report for Wellness Survey_2024-06-28(ALL Faculy forWellness Survey).csv"
path_pwac     = "/content/PWAC Resident Info 2024(PWAC Emails).csv"
path_nick     = "/content/RBHS_Fac_Staff_2024_Nick's File(Sheet2).csv"
path_post_all = "/content/RU Health Postdocs as of 8-7-24 all(Sheet2).csv"
path_post_no  = "/content/RU Health Postdocs as of 8-7-24 no PharmD(Sheet2).csv"


# -----------------------------
# 4) Load & clean
# -----------------------------
# Faculty has 5 metadata rows before the header row → header=5
df_faculty = clean_column_names(safe_read_csv(path_faculty, header=5))
df_pwac    = clean_column_names(safe_read_csv(path_pwac))
df_nick    = clean_column_names(safe_read_csv(path_nick))
df_postall = clean_column_names(safe_read_csv(path_post_all))
df_postno  = clean_column_names(safe_read_csv(path_post_no))

# Standardize canonical identity keys
fac       = standardize_identity_cols(df_faculty, colmap_faculty)
pwac      = standardize_identity_cols(df_pwac,    colmap_pwac)
nick      = standardize_identity_cols(df_nick,    colmap_nick)
post_all  = split_full_name(standardize_identity_cols(df_postall, colmap_post))
post_no   = split_full_name(standardize_identity_cols(df_postno,  colmap_post))

# Quick previews (optional)
preview(core_view(fac), 5, "Faculty — core view")
preview(core_view(pwac), 5, "PWAC — core view")
preview(core_view(nick), 5, "Nick — core view")
preview(core_view(post_all), 5, "Postdocs ALL — core view")
preview(core_view(post_no), 5, "Postdocs NO — core view")

# Union postdocs
postdocs = union_postdocs(post_all, post_no)
preview(core_view(postdocs), 5, "Postdocs — Combined")

# Key health audit
key_health(fac, "Faculty")
key_health(pwac, "PWAC")
key_health(nick, "Nick")
key_health(postdocs, "Postdocs (Combined)")


# -----------------------------
# 5) Hierarchical merge utilities
# -----------------------------
def merge_unmatched_on_key(base, other, key_cols, suffix):
    """
    Left-join 'other' into 'base' on key_cols but only for rows still unmatched.
    Returns updated base and a boolean mask of rows that were matched in this step.
    key_cols: list of column names to join on (e.g., ['NetID'] or ['Email'] or ['FirstName','LastName'])
    """
    left = base.copy()
    right = other.copy()

    # ensure join keys exist
    for k in key_cols:
        if k not in left.columns:
            left[k] = pd.NA
        if k not in right.columns:
            right[k] = pd.NA

    # indicator to find which base rows are still unmatched overall (no source cols from other yet)
    # we'll define "unmatched" as rows where all key cols have no match in previous steps.
    # Practically, we match rows where the key(s) are not null and haven't been updated before.
    if isinstance(key_cols, list) and len(key_cols) > 1:
        still_matchable = left[key_cols].notna().all(axis=1)
    else:
        still_matchable = left[key_cols[0]].notna()

    # do the merge
    merged = left.merge(
        right,
        how='left',
        on=key_cols,
        suffixes=('', suffix),
        copy=False
    )

    # mark matched rows for this step (any new non-null right fields imply a match)
    # We'll use a simple heuristic: if the other had Email or NetID populated, we consider it a match.
    right_sig_cols = [c for c in right.columns if c not in key_cols]
    right_sig_cols = right_sig_cols if right_sig_cols else right.columns.tolist()
    newcols = [c for c in merged.columns if c.endswith(suffix)]
    matched_now = still_matchable & merged[newcols].notna().any(axis=1) if newcols else pd.Series(False, index=merged.index)

    return merged, matched_now

def coalesce_columns(df, preferred_list, fallback_suffixes=("_pwac","_nick","_post")):
    """
    For each preferred base column name, if suffixed versions exist, coalesce into the base.
    Example: coalesce 'Title' with 'Title_pwac','Title_nick','Title_post' using combine_first.
    """
    out = df.copy()
    for col in preferred_list:
        candidates = [col] + [col + s for s in fallback_suffixes if (col + s) in out.columns]
        if not any(c in out.columns for c in candidates):
            continue
        # start from the leftmost and combine_first iteratively
        base_series = out[candidates[0]] if candidates[0] in out.columns else pd.Series(pd.NA, index=out.index)
        for c in candidates[1:]:
            if c in out.columns:
                base_series = base_series.combine_first(out[c])
        out[col] = base_series
    return out

def drop_suffix_columns(df, suffixes=("_pwac","_nick","_post")):
    cols = [c for c in df.columns if not any(c.endswith(s) for s in suffixes)]
    return df[cols].copy()


# -----------------------------
# 6) Build RutgersMaster via hierarchical joins
#     Base: Faculty (richest: has NPI, rank, track, etc.)
#     Then: Nick → PWAC → Postdocs
#     Keys: NetID → Email → (FirstName + LastName)
# -----------------------------
master = fac.copy()

# First merge: RBHS Nick
# 6.1 NetID
tmp, matched_nick_by_netid = merge_unmatched_on_key(master, nick, ['NetID'], suffix="_nick")
master = tmp
# 6.2 Email
tmp, matched_nick_by_email = merge_unmatched_on_key(master, nick, ['Email'], suffix="_nick")
master = tmp
# 6.3 Name
tmp, matched_nick_by_name = merge_unmatched_on_key(master, nick, ['FirstName','LastName'], suffix="_nick")
master = tmp

# Second merge: PWAC
tmp, matched_pwac_by_netid = merge_unmatched_on_key(master, pwac, ['NetID'], suffix="_pwac")
master = tmp
tmp, matched_pwac_by_email = merge_unmatched_on_key(master, pwac, ['Email'], suffix="_pwac")
master = tmp
tmp, matched_pwac_by_name = merge_unmatched_on_key(master, pwac, ['FirstName','LastName'], suffix="_pwac")
master = tmp

# Third merge: Postdocs (Union)
tmp, matched_post_by_netid = merge_unmatched_on_key(master, postdocs, ['NetID'], suffix="_post")
master = tmp
tmp, matched_post_by_email = merge_unmatched_on_key(master, postdocs, ['Email'], suffix="_post")
master = tmp
tmp, matched_post_by_name = merge_unmatched_on_key(master, postdocs, ['FirstName','LastName'], suffix="_post")
master = tmp

print("\n✅ Hierarchical merges complete.")

# Coalesce common descriptive columns where the other files might have richer values
preferred_cols = [
    'TITLE','Title','TRACK','Department/Division','DeptNm','Department',
    'PRIMARYSCHOOL','School/Div','NPI_NUMBER','CAMPUS','role','ETyp','Empl Class Name'
]
master = coalesce_columns(master, preferred_cols, fallback_suffixes=("_nick","_pwac","_post"))

# Optional: add placeholder clinical status (we’ll refine later with your extra files)
master = add_clinical_placeholder(master)

# Remove the suffixed columns (keep only base + coalesced results)
master = drop_suffix_columns(master, suffixes=("_nick","_pwac","_post"))

# Core view for convenience
master_core_cols = [
    'FirstName','LastName','NetID','Email',
    'Title' if 'Title' in master.columns else 'TITLE',
    'TRACK',
    'Department/Division' if 'Department/Division' in master.columns else ('Department' if 'Department' in master.columns else 'DeptNm'),
    'PRIMARYSCHOOL' if 'PRIMARYSCHOOL' in master.columns else 'School/Div',
    'NPI_NUMBER','CAMPUS',
    'Clinical_Status_placeholder'
]
master_core_cols = [c for c in master_core_cols if c in master.columns]
master_core = master[master_core_cols].copy()

preview(master_core, 8, "RutgersMaster — CORE (preview)")
print(f"Master shape (full): {master.shape}")
print(f"Master shape (core): {master_core.shape}")


# -----------------------------
# 7) Deduplication strategy
#     1) Exact NetID duplicates → keep first
#     2) Else, duplicates by Email with Rutgers domain preference
#     3) Else, fallback to FirstName+LastName+Department matching
# -----------------------------
def is_rutgers_email(s):
    s = s.astype(str).str.lower()
    return s.str.contains(r'@.*rutgers', na=False) | s.str.contains(r'@.*rbhs', na=False)

def deduplicate_master(df):
    out = df.copy()

    # Step 1: Dedup by NetID (most reliable)
    if 'NetID' in out.columns:
        out = out.sort_values(['NetID']).drop_duplicates(subset=['NetID'], keep='first')

    # Step 2: Dedup by Email (prefer Rutgers domain emails)
    if 'Email' in out.columns:
        # mark rutgers emails
        out['_rutgers_email'] = is_rutgers_email(out['Email']).astype(int)
        # keep the rutgers email where duplicates exist
        out = out.sort_values(['Email','_rutgers_email'], ascending=[True, False]) \
                 .drop_duplicates(subset=['Email'], keep='first') \
                 .drop(columns=['_rutgers_email'])

    # Step 3: Dedup by First+Last+Department (if Department present)
    dept_col = None
    for candidate in ['Department/Division','Department','DeptNm']:
        if candidate in out.columns:
            dept_col = candidate
            break

    if dept_col and {'FirstName','LastName'}.issubset(out.columns):
        out['_name_dept_key'] = (
            out['FirstName'].astype(str).str.lower().str.strip() + '|' +
            out['LastName'].astype(str).str.lower().str.strip() + '|' +
            out[dept_col].astype(str).str.lower().str.strip()
        )
        out = out.sort_values('_name_dept_key').drop_duplicates(subset=['_name_dept_key'], keep='first').drop(columns=['_name_dept_key'])

    return out

master_dedup = deduplicate_master(master)
master_core_dedup = master_core.loc[master_dedup.index.intersection(master_core.index)].copy()  # align indices

print(f"\nAfter dedup → full: {master_dedup.shape} | core: {master_core_dedup.shape}")
preview(master_core_dedup, 8, "RutgersMaster — CORE (DEDUP) preview")


# -----------------------------
# 8) Save outputs
# -----------------------------
full_path  = "/content/RutgersMaster_full.csv"
core_path  = "/content/RutgersMaster_core.csv"
full_dpath = "/content/RutgersMaster_full_dedup.csv"
core_dpath = "/content/RutgersMaster_core_dedup.csv"

master.to_csv(full_path, index=False)
master_core.to_csv(core_path, index=False)
master_dedup.to_csv(full_dpath, index=False)
master_core_dedup.to_csv(core_dpath, index=False)

print("\n✅ Saved:")
print(" - Full:", full_path)
print(" - Core:", core_path)
print(" - Full (dedup):", full_dpath)
print(" - Core (dedup):", core_dpath)


# -----------------------------
# 9) (Optional) Quick QA plots
# -----------------------------
try:
    plt.figure()
    master_core['Clinical_Status_placeholder'].value_counts(dropna=False).plot(kind='bar', rot=0, title='Clinical Status (placeholder)')
    plt.show()
except Exception as e:
    print("Plot skipped:", e)

print("\nAll done. You can now download the CSVs from the left Files pane.")


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return date

✅ Libraries installed & imported.


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return date


=== Faculty — core view ===
Shape: (2594, 7)


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,FirstName,LastName,NetID,Email,Department/Division,PRIMARYSCHOOL,NPI_NUMBER
0,Elizabeth,Abadiotakis,ea626,ea626@rutgers.edu,Psychiatric Rehabilitation & Counseling Profes...,School of Health Professions,
1,Daniel,Abazia,dabazia,dabazia@pharmacy.rutgers.edu,Pharmacy Practice & Administration,Ernest Mario School of Pharmacy,
2,Ali,Abbas,abbasal,abbasal@rutgers.edu,Diagnostic Sciences,Rutgers School of Dental Medicine,1194948604.0
3,Maha,Abdellatif,abdellma,abdellma@rutgers.edu,Cell Biology & Molecular Medicine,New Jersey Medical School,
4,Hammad,Abdelquader,habdel,habdel@rutgers.edu,Family Medicine and Community Health,Robert Wood Johnson Medical School,1386096865.0


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Columns: ['FirstName', 'LastName', 'NetID', 'Email', 'Department/Division', 'PRIMARYSCHOOL', 'NPI_NUMBER']

=== PWAC — core view ===
Shape: (1785, 4)


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,FirstName,LastName,NetID,Email
0,Rasha,Al Thawaher Halaseh,,rasha.althawaheralaseh@rwjbh.org
1,Murad,Aldarayseh,,murad.aldarayseh@rwjbh.org
2,Ahmad,Almelegy,aa2513,ahmad.almelegy@rwjbh.org
3,Gabriel,Botelho Bastos Zaverucha,,gabriel.botelhobastoszaverucha@rwjbh.org
4,Jae Hyuk Byun,Byun,jhb185,jae.byun@rwjbh.org


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Columns: ['FirstName', 'LastName', 'NetID', 'Email']

=== Nick — core view ===
Shape: (10825, 8)


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,FirstName,LastName,NetID,Email,Title,DeptNm,role,ETyp
0,Kyle S.,Aaronson,ksa53,ksa53@rbhs.rutgers.edu,ASSOC VICE CHANCELLOR FINANCE,RBHS - Finance & Admin,STAFF,1
1,Iyabo,Aasa,aasala,iyabo.aasa@rutgers.edu,LPN,UCHC-South Woods Psn-Ancillary,STAFF,1
2,Grace,Abad,abadgb,abadgb@rutgers.edu,ADVANCED PRACTICE NURSE PD,UBHC-Acute Psych Services,STAFF,4
3,Valori,Abad,abadva,valori.abad@rutgers.edu,REGIONAL NURSE ADMINISTRATOR,SN-FXB Center-Chp Program,STAFF,1
4,Elizabeth,Abadiotakis,ea626,ea626@rutgers.edu,SHP-PD,SHP-Psych Rehab & Counsel Pro,STAFF,4


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Columns: ['FirstName', 'LastName', 'NetID', 'Email', 'Title', 'DeptNm', 'role', 'ETyp']

=== Postdocs ALL — core view ===
Shape: (615, 8)


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,FirstName,LastName,NetID,Email,Title,Department,School/Div,Empl Class Name
0,"Adedeji,Anuoluwapo",J,aa1330,aa1330@rutgers.edu,POST DOCTORAL ASSOC,Pharm-Pharmacy Practice&Admin,Ernest Mario School - Pharmacy,Regular FT/PT
1,"Armstrong,Abigail",J,aa2253,aa2253@rutgers.edu,POST DOC FELLOW,CABM-RBHS,CABM,Regular FT/PT
2,"Abramyan,Arevik",,aa3051,aa3051@rutgers.edu,POST DOC FELLOW,RWJ-Neurosurgery,RWJ-Finance Office,Regular FT/PT
3,"Choe,Angel",A,aac312,aac312@rutgers.edu,POST DOCTORAL ASSOC,Pharm-Pharmacy Practice&Admin,Ernest Mario School - Pharmacy,Regular FT/PT
4,"Lette,Amie",A,aal204,aal204@rutgers.edu,POST DOCTORAL ASSOC,Pharm-Pharmacy Practice&Admin,Ernest Mario School - Pharmacy,Regular FT/PT


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Columns: ['FirstName', 'LastName', 'NetID', 'Email', 'Title', 'Department', 'School/Div', 'Empl Class Name']

=== Postdocs NO — core view ===
Shape: (246, 8)


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,FirstName,LastName,NetID,Email,Title,Department,School/Div,Empl Class Name
0,De,"Sa Nogueira,David",dd979,dd979@rutgers.edu,POST DOC FELLOW,Brain Health Institute,Brain Health Institute,Regular FT/PT
1,"Delcourte,Sarah",I,sd1249,sd1249@rutgers.edu,POST DOC FELLOW,Brain Health Institute,Brain Health Institute,Regular FT/PT
2,"Love,Cameron",,cl1439,cl1439@rutgers.edu,POST DOCTORAL ASSOC,CABM,CABM,Regular FT/PT
3,"Biswas,Iman",,ib270,ib270@rutgers.edu,POST DOCTORAL ASSOC,CABM,CABM,Regular FT/PT
4,"DAmico,Kevin",,kd652,kd652@rutgers.edu,POST DOCTORAL ASSOC,CABM,CABM,Regular FT/PT


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Columns: ['FirstName', 'LastName', 'NetID', 'Email', 'Title', 'Department', 'School/Div', 'Empl Class Name']

=== Postdocs — Combined ===
Shape: (591, 8)


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Unnamed: 0,FirstName,LastName,NetID,Email,Title,Department,School/Div,Empl Class Name
0,"Adedeji,Anuoluwapo",J,aa1330,aa1330@rutgers.edu,POST DOCTORAL ASSOC,Pharm-Pharmacy Practice&Admin,Ernest Mario School - Pharmacy,Regular FT/PT
1,"Armstrong,Abigail",J,aa2253,aa2253@rutgers.edu,POST DOC FELLOW,CABM-RBHS,CABM,Regular FT/PT
2,"Abramyan,Arevik",,aa3051,aa3051@rutgers.edu,POST DOC FELLOW,RWJ-Neurosurgery,RWJ-Finance Office,Regular FT/PT
3,"Choe,Angel",A,aac312,aac312@rutgers.edu,POST DOCTORAL ASSOC,Pharm-Pharmacy Practice&Admin,Ernest Mario School - Pharmacy,Regular FT/PT
4,"Lette,Amie",A,aal204,aal204@rutgers.edu,POST DOCTORAL ASSOC,Pharm-Pharmacy Practice&Admin,Ernest Mario School - Pharmacy,Regular FT/PT


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Columns: ['FirstName', 'LastName', 'NetID', 'Email', 'Title', 'Department', 'School/Div', 'Empl Class Name']
Faculty — rows: 2594 | NetID present: 2497 | Email present: 2579 | Both: 2490
PWAC — rows: 1785 | NetID present: 91 | Email present: 1776 | Both: 91
Nick — rows: 10825 | NetID present: 10825 | Email present: 10825 | Both: 10825
Postdocs (Combined) — rows: 591 | NetID present: 591 | Email present: 591 | Both: 591


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return date


✅ Hierarchical merges complete.


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return date

AttributeError: 'DataFrame' object has no attribute 'dtype'

  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)
