In [33]:
from event_table import Event_Table

In [34]:
loaded = Event_Table.from_path(file_path='fitres_10_11_2_rep1.parquet', ROI='Medium', Instrument='Refeyn OneMP', needs_calibration=True)

Read parquet succesfully!


TypeError: Event_Table.__init__() missing 2 required positional arguments: 'calibration_results' and 'calibrated'

In [23]:
import pandas as pd

def inspect_column_names(df, label="df"):
    print(f"Columns in {label}: ({len(df.columns)})")
    for i, c in enumerate(df.columns):
        s = str(c)
        cp = " ".join(f"U+{ord(ch):04X}" for ch in s)
        print(f"{i:02d}: {repr(s)}   --> {cp}")

# load both: the loaded parquet & your standard csv used previously
df_loaded = pd.read_parquet('fitres_10_11_2_rep1.parquet')
df_standard = pd.read_csv('standard_event_table_format.csv')

inspect_column_names(df_loaded, "loaded")
print()
inspect_column_names(df_standard, "standard")

Columns in loaded: (13)
00: 'frame'   --> U+0066 U+0072 U+0061 U+006D U+0065
01: 'y_det'   --> U+0079 U+005F U+0064 U+0065 U+0074
02: 'x_det'   --> U+0078 U+005F U+0064 U+0065 U+0074
03: 'contrasts_det'   --> U+0063 U+006F U+006E U+0074 U+0072 U+0061 U+0073 U+0074 U+0073 U+005F U+0064 U+0065 U+0074
04: 'contrasts'   --> U+0063 U+006F U+006E U+0074 U+0072 U+0061 U+0073 U+0074 U+0073
05: 'x_fit'   --> U+0078 U+005F U+0066 U+0069 U+0074
06: 'y_fit'   --> U+0079 U+005F U+0066 U+0069 U+0074
07: 'contrasts_se'   --> U+0063 U+006F U+006E U+0074 U+0072 U+0061 U+0073 U+0074 U+0073 U+005F U+0073 U+0065
08: 'r2_fit'   --> U+0072 U+0032 U+005F U+0066 U+0069 U+0074
09: 'res_fit'   --> U+0072 U+0065 U+0073 U+005F U+0066 U+0069 U+0074
10: 'x'   --> U+0078
11: 'y'   --> U+0079
12: 'masses_kDa'   --> U+006D U+0061 U+0073 U+0073 U+0065 U+0073 U+005F U+006B U+0044 U+0061

Columns in standard: (13)
00: 'frame'   --> U+0066 U+0072 U+0061 U+006D U+0065
01: 'y_det'   --> U+0079 U+005F U+0064 U+0065 U+0074
02

In [18]:
# fix_standard_header_commas.py
import pandas as pd
import re
from pathlib import Path
import shutil

STANDARD = Path("standard_event_table_format.csv")
BACKUP = STANDARD.with_suffix(".csv.bak")

if not STANDARD.exists():
    raise FileNotFoundError(f"{STANDARD} not found")

# 1) Backup
shutil.copy2(STANDARD, BACKUP)
print(f"Backup created at {BACKUP}")

# 2) Read file robustly (try csv, fallback to tsv if necessary)
def try_read(path):
    df = pd.read_csv(path, dtype=str)
    # if many column names start or end with a comma, try reading as tab-separated (maybe it was TSV)
    cols = list(df.columns)
    if sum(1 for c in cols if isinstance(c, str) and (c.startswith('"') or c.endswith(','))) > 0:
        try:
            df = pd.read_csv(path, sep="\t", dtype=str)
        except Exception:
            pass
    return df

df = try_read(STANDARD)

print("Header before cleaning:")
print(list(df.columns))

# 3) Clean routine for column names
def clean_col(c: str) -> str:
    if c is None:
        return ""
    s = str(c)
    # Remove surrounding double quotes if present
    if s.startswith('"') and s.endswith('"'):
        s = s[1:-1]
    # Remove trailing commas that are part of the header string
    s = re.sub(r',+$', '', s)
    # Remove stray leading/trailing whitespace and invisible chars
    s = re.sub(r'[\uFEFF\u200B-\u200D\r\n\t]', '', s).strip()
    return s

df.columns = [clean_col(c) for c in df.columns]

print("Header after cleaning:")
print(list(df.columns))

# 4) Write cleaned file back (no index, UTF-8 no BOM)
df.to_csv(STANDARD, index=False, encoding="utf-8")
print(f"Cleaned file written to {STANDARD} (original backed up at {BACKUP})")


Backup created at standard_event_table_format.csv.bak
Header before cleaning:
['frame,,"y_det,","x_det,","contrasts_det,","contrasts,","x_fit,","y_fit,","contrasts_se,","r2_fit,","res_fit,","x,","y,",masses_kDa']
Header after cleaning:
['frame,,"y_det,","x_det,","contrasts_det,","contrasts,","x_fit,","y_fit,","contrasts_se,","r2_fit,","res_fit,","x,","y,",masses_kDa']
Cleaned file written to standard_event_table_format.csv (original backed up at standard_event_table_format.csv.bak)


In [16]:
# clean_standard_columns.py
import pandas as pd
import re
from pathlib import Path
import shutil

STANDARD_PATH = Path("standard_event_table_format.csv")
BACKUP_PATH = STANDARD_PATH.with_suffix(".csv.bak")

def try_read(path: Path):
    """Try to read as CSV, if columns look tab-prefixed try reading as TSV."""
    df = pd.read_csv(path, dtype=str)
    # quick check: if many column names start with '\t', try sep='\t'
    cols = list(df.columns)
    if sum(1 for c in cols if isinstance(c, str) and c.startswith("\t")) > 0:
        df = pd.read_csv(path, sep="\t", dtype=str)
    return df

def clean_name(s: str) -> str:
    if s is None:
        return ""
    s = str(s)
    # remove BOM and common invisible characters including tabs/newlines/zero-width
    s = re.sub(r'[\uFEFF\u200B-\u200D\u0009\u000A\u000D]', '', s)
    s = s.strip()
    return s

def main():
    if not STANDARD_PATH.exists():
        raise FileNotFoundError(f"{STANDARD_PATH} not found in current directory.")

    # 1. Read (try csv, fall back to tsv)
    df = try_read(STANDARD_PATH)

    # 2. Show before
    print("Columns in standard (before):")
    for i, c in enumerate(df.columns):
        print(f"{i:02d}: {repr(c)}")

    # 3. Backup original
    shutil.copy2(STANDARD_PATH, BACKUP_PATH)
    print(f"\nBackup saved to: {BACKUP_PATH}")

    # 4. Clean column names
    cleaned = [clean_name(c) for c in df.columns]
    df.columns = cleaned

    # 5. Show after + diff
    print("\nColumns in standard (after):")
    for i, c in enumerate(df.columns):
        print(f"{i:02d}: {repr(c)}")

    before_set = set(repr(c) for c in pd.read_csv(BACKUP_PATH, dtype=str).columns)
    after_set = set(repr(c) for c in df.columns)
    removed = before_set - after_set
    added = after_set - before_set
    if removed or added:
        print("\nColumn differences (visual):")
        if removed:
            print("Removed (old representations):")
            for r in sorted(removed):
                print("  ", r)
        if added:
            print("Added (new representations):")
            for a in sorted(added):
                print("  ", a)
    else:
        print("\nNo set-difference detected (names normalized).")

    # 6. Overwrite standard file as CSV with UTF-8 (no BOM), no index
    df.to_csv(STANDARD_PATH, index=False, encoding="utf-8")
    print(f"\nCleaned standard file written back to: {STANDARD_PATH}")

if __name__ == "__main__":
    main()


Columns in standard (before):
00: 'frame,'
01: 'y_det,'
02: 'x_det,'
03: 'contrasts_det,'
04: 'contrasts,'
05: 'x_fit,'
06: 'y_fit,'
07: 'contrasts_se,'
08: 'r2_fit,'
09: 'res_fit,'
10: 'x,'
11: 'y,'
12: 'masses_kDa'

Backup saved to: standard_event_table_format.csv.bak

Columns in standard (after):
00: 'frame,'
01: 'y_det,'
02: 'x_det,'
03: 'contrasts_det,'
04: 'contrasts,'
05: 'x_fit,'
06: 'y_fit,'
07: 'contrasts_se,'
08: 'r2_fit,'
09: 'res_fit,'
10: 'x,'
11: 'y,'
12: 'masses_kDa'

Column differences (visual):
Removed (old representations):
   '\tcontrasts'
   '\tcontrasts_det'
   '\tcontrasts_se'
   '\tmasses_kDa'
   '\tr2_fit'
   '\tres_fit'
   '\tx'
   '\tx_det'
   '\tx_fit'
   '\ty'
   '\ty_det'
   '\ty_fit'
   'frame'
Added (new representations):
   'contrasts,'
   'contrasts_det,'
   'contrasts_se,'
   'frame,'
   'masses_kDa'
   'r2_fit,'
   'res_fit,'
   'x,'
   'x_det,'
   'x_fit,'
   'y,'
   'y_det,'
   'y_fit,'

Cleaned standard file written back to: standard_event_table_f