In [5]:
import os
import pandas as pd

# Find all files in the "assets/LFS" folder that contain "TS_AN" in their filename and end with .xlsx
lfs_folder = "assets/LFS"
annual_files = [f for f in os.listdir(lfs_folder) if "TS_AN" in f and f.endswith(".xlsx")]

# Build a dictionary: {file_path: {sheet_name: DataFrame, ...}, ...}
# Map all file names to their sheet names in a dictionary: {file_path: [sheet_name1, sheet_name2, ...], ...}
annual_file_sheets = {}
for filename in annual_files:
    file_path = os.path.join(lfs_folder, filename)
    try:
        xl = pd.ExcelFile(file_path)
        annual_file_sheets[file_path] = xl.sheet_names
    except Exception as e:
        print(f"Error reading {file_path}: {e}")

# Count total number of sheets loaded across all files
total_sheets = sum(len(sheets) for sheets in annual_file_sheets.values())
print(f"Loaded {total_sheets} annual LFS sheets from {len(annual_files)} files.")


Loaded 22 annual LFS sheets from 3 files.


In [6]:
annual_file_sheets

{'assets/LFS\\A0101_SJO03_TS_AN_00_1981_00_2024_01_F_EN.xlsx': ['Contents',
  'POPUL-Regio',
  'POPUL-Status',
  'EDUC-SexAge',
  'EDUC-Regio',
  'EDUC-Status',
  'STATUS-SexAge',
  'STATUS-Regio'],
 'assets/LFS\\A0101_SJO03_TS_AN_00_1981_00_2024_02_F_EN.xlsx': ['Contents',
  'JOB-SexAge',
  'JOB-Regio',
  'JOB-Occup',
  'JOB-Sector',
  'OCCUP-Demo',
  'OCCUP-2d',
  'SECTOR-Demo',
  'SECTOR-2d',
  'EMP-SexAge',
  'EMP-Regio'],
 'assets/LFS\\A0101_SJO03_TS_AN_00_1981_00_2024_03_F_EN.xlsx': ['Contents',
  'UNE-SexAge',
  'UNE-Regio']}

In [9]:
# read first file and second sheet
sh1 = annual_file_sheets['assets/LFS\\A0101_SJO03_TS_AN_00_1981_00_2024_01_F_EN.xlsx'][1]
df1_a = pd.read_excel('assets/LFS\\A0101_SJO03_TS_AN_00_1981_00_2024_01_F_EN.xlsx', sheet_name=sh1)
df1_a.head()
# Move the first row to be the column names, and reset the DataFrame
df1_a.columns = df1_a.iloc[0]
df1_a = df1_a[1:].reset_index(drop=True)
df1_a.columns.name = None  # Remove columns' name if present

# If any columns are unnamed, give them a default name
df1_a.columns = [
    col if pd.notna(col) and str(col).strip() != "" else f"unnamed_{i}"
    for i, col in enumerate(df1_a.columns)
]
df1_a


Unnamed: 0,Year,Region - NUTS II,unnamed_2,0-14,15-19,20-24,25-29,30-44,45-64,65+,...,25-29.1,30-44.1,45-64.1,65+.1,Greek,EU country,Other,Single,Married,"Widowed, divorced or legally separated"
0,,,,,,,,,,,...,,,,,,,,,,
1,2024,Anatoliki Makedonia-Thraki,581.123160,77.131093,38.521068,23.959562,27.522799,104.773606,163.670587,145.544445,...,11.469888,50.249627,83.342897,81.560521,572.358515,1.231404,7.533242,...,...,...
2,2024,Kentriki Makedonia,1822.676233,243.768781,105.572367,89.546905,109.071146,306.4556,532.374987,435.886446,...,54.672215,148.335325,275.128395,244.574501,1784.794801,4.408964,33.250837,...,...,...
3,2024,Dytiki Makedonia,264.887793,33.597145,15.791495,12.528691,13.223801,41.641128,80.714678,67.390855,...,7.09287,20.065734,39.743738,36.475272,263.142739,0.156069,1.377237,...,...,...
4,2024,Ipeiros,327.688629,39.076368,17.182466,15.565287,17.619056,50.771891,93.712814,93.760747,...,7.762592,25.409761,46.234421,50.598356,318.625309,0.995919,7.978636,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
584,1981,Kriti,445.284270,107.957934,32.299668,19.113867,22.111866,79.92,108.424467,75.456468,...,11.722266,41.491467,56.410533,39.559401,...,...,...,...,...,...
585,1981,Ipeiros,416.518065,89.778132,33.033933,20.112867,24.709266,72.927,105.827067,70.1298,...,12.054933,36.5634,57.075867,39.96,...,...,...,...,...,...
586,1981,Thraki,306.493200,70.863066,23.242734,14.718267,18.7812,63.803133,79.453467,35.631333,...,8.7912,32.634333,39.5604,19.647333,...,...,...,...,...,...
587,1981,Nissia Anatolikou Aigaiou,306.023670,76.256667,19.5804,14.451534,21.777201,60.272667,62.003934,51.681267,...,11.521467,28.638333,31.301667,27.839133,...,...,...,...,...,...
