In [49]:
import pandas as pd
import numpy as np
from scipy.ndimage import label

# Load the Excel file (adjust the file name/path as needed)
df = pd.read_excel("Test_Data.xlsx", header=None)

# Convert the DataFrame to a NumPy array.
# Non-NaN cells are considered part of a table.
data = df.values

# Create a boolean mask: True for non-empty cells, False for empty ones.
mask = ~pd.isna(data)

# Perform connected component analysis using an 8-connectivity structure.
structure = np.ones((3, 3), dtype=int)
labeled, ncomponents = label(mask, structure=structure)

# Initialize a dictionary to hold the tables as DataFrames.
table_dict = {}

# Loop over each connected component (each "table")
for component in range(1, ncomponents + 1):
    # Get the indices (row, col) where the component is present
    indices = np.argwhere(labeled == component)
    if indices.size == 0:
        continue

    # Determine the bounding box for the component
    rows = indices[:, 0]
    cols = indices[:, 1]
    rmin, rmax = rows.min(), rows.max()
    cmin, cmax = cols.min(), cols.max()

    # Extract the table using the bounding box (include the end indices)
    table_df = df.iloc[rmin:rmax+1, cmin:cmax+1]
    
    # Store the DataFrame in the dictionary using a descriptive key.
    table_dict[f"table_{component}"] = table_df

# Example: Access and print each table DataFrame
for name, table in table_dict.items():
    print(f"\n{name}:")
    print(table)



table_1:
      11        12        13
0     ID  Lineage       OPER
1    555        T1     _mavd
2    222        T3  _hujrfwe
3    333        T4    _dfhhf
4    444        T5   _hfhfgd
5  22233        T6    _jkglg

table_2:
                             1
1  THE ECONOMICS OF THE WORLD 
2                 IT IS GREAT 

table_3:
         1         2                3           4            5             6   \
8   Mapiing  Lineage   Transofrmation           ID         Teat         gjFGY   
9         1      Bess       MacSkeagan      Female        Jasen       Kettles   
10        2    Wallis       Dinnington        Male       Claire         Paute   
11        3   Jeramie           Lowman        Male        Olwen      Augustus   
12        4   Corella       O'Glassane      Female       Tracey         Ellen   
13        5  Ricoriki        Gomersall        Male        Byrle       Bendare   
14        6    Tandie        Caseborne      Female     Broderic       Adamoli   
15        7       Pip     

In [50]:
table_dict["table_1"]

Unnamed: 0,11,12,13
0,ID,Lineage,OPER
1,555,T1,_mavd
2,222,T3,_hujrfwe
3,333,T4,_dfhhf
4,444,T5,_hfhfgd
5,22233,T6,_jkglg


In [51]:
table_dict["table_2"]

Unnamed: 0,1
1,THE ECONOMICS OF THE WORLD
2,IT IS GREAT


In [52]:
table_dict["table_3"]

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12
8,Mapiing,Lineage,Transofrmation,ID,Teat,gjFGY,GHJKM,HNJHJN,HH,HJKJM,EERt,rweer
9,1,Bess,MacSkeagan,Female,Jasen,Kettles,jkettles17@reference.com,Bigender,Nomi,Gracewood,ngracewood2a@cdbaby.com,T1.JG
10,2,Wallis,Dinnington,Male,Claire,Paute,cpaute18@pcworld.com,Polygender,Hersh,Boome,hboome2b@intel.com,T2.jhuy
11,3,Jeramie,Lowman,Male,Olwen,Augustus,oaugustus19@t-online.de,Female,Jennie,Roderick,jroderick2c@github.com,T3.terew
12,4,Corella,O'Glassane,Female,Tracey,Ellen,tellen1a@topsy.com,Female,Nolie,Laven,nlaven2d@fastcompany.com,T1.JG
13,5,Ricoriki,Gomersall,Male,Byrle,Bendare,bbendare1b@shareasale.com,Male,Malinda,Poulsum,mpoulsum2e@imgur.com,T2.jhuy
14,6,Tandie,Caseborne,Female,Broderic,Adamoli,badamoli1c@naver.com,Male,Hester,Key,hkey2f@chronoengine.com,T3.terew
15,7,Pip,Pragnell,Male,Christopher,McCrackan,cmccrackan1d@stanford.edu,Male,Conrado,Tothe,ctothe2g@wsj.com,T1.JG
16,8,Ingunna,Paddie,Female,Curran,Maud,cmaud1e@bandcamp.com,Male,Karla,Setterington,ksetterington2h@icio.us,T2.jhuy
17,9,Claudine,Iacomo,Female,Aguie,Kidde,akidde1f@gov.uk,Male,Conni,Gossipin,cgossipin2i@paginegialle.it,T3.terew
