## 🟥 Imports

In [1]:
!pip list


Package                            Version
---------------------------------- -------------------
absl-py                            1.4.0
accelerate                         0.34.2
aiohappyeyeballs                   2.4.3
aiohttp                            3.10.10
aiosignal                          1.3.1
alabaster                          0.7.16
albucore                           0.0.19
albumentations                     1.4.20
altair                             4.2.2
annotated-types                    0.7.0
anyio                              3.7.1
argon2-cffi                        23.1.0
argon2-cffi-bindings               21.2.0
array_record                       0.5.1
arviz                              0.20.0
astropy                            6.1.4
astropy-iers-data                  0.2024.10.28.0.34.7
astunparse                         1.6.3
async-timeout                      4.0.3
atpublic                           4.1.0
attrs                              24.2.0
audioread        

In [None]:
import pandas as pd
import os
import numpy as np
import datetime
from datetime import datetime, timedelta
import re


!pip install pyxlsb
import pyxlsb
import pandas as pd
from openpyxl import load_workbook

# Suppress warnings
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

Collecting pyxlsb
  Downloading pyxlsb-1.0.10-py2.py3-none-any.whl (23 kB)
Installing collected packages: pyxlsb
Successfully installed pyxlsb-1.0.10
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 🟥 Functions

In [None]:
########################################################################################################################################################################################################################
########################################################################################################################################################################################################################

def extract_sheet_names(file_path):
    if file_path.endswith('.xlsb'):
        # Handle XLSB files
        with pyxlsb.open_workbook(file_path) as xlsb:
            sheet_names = [sheetname for sheetname in xlsb.sheets]
    else:
        # Handle XLSX or XLS files
        xls = pd.ExcelFile(file_path)
        sheet_names = xls.sheet_names

    return sheet_names

########################################################################################################################################################################################################################
########################################################################################################################################################################################################################

def extract_garment_type(file_path, sheet_garment_mapping):

    for sheet_name, garment_type in sheet_garment_mapping:
        # Read the specific sheet
        try:
          df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)
        except:
          continue

        # Find the row where the second column (index 1) has "Garment type:"
        row = df[df[1] == "Garment type:"]

        # Check the third column (index 2) for the expected garment type
        if not row.empty and row.iloc[0, 2] == garment_type:
            return garment_type

########################################################################################################################################################################################################################
########################################################################################################################################################################################################################

def assess_main_info_column(file_path, sheet_names):

  #Returns 0 or 1 or 2, depending on which column holds the dimensions (QC provider, Supplier, etc).

    for sheet_name in sheet_names:

      # Check if sheet name exists
      try:
        df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)
      except:
        continue

      # Check if main info column candidate has supplier/po descriptions. Try columns 1, 2, 3
      for main_info_column in (0, 1, 2):

        supplier_row = df[df[main_info_column] == 'Supplier:']
        po_row = df[df[main_info_column] == 'PO number:']

        if supplier_row.empty and po_row.empty:
          continue
        else:
          return main_info_column

    print('🔴 Main info column not found (neither 0 nor 1 nor 2)')
    return 'Not found'

########################################################################################################################################################################################################################

def assess_sheet_name(file_path, sheet_names, main_info_column):


    if file_path.endswith('PTR.23.00700.4101_RESPOKE_VIPALTEX_21.09.2023_HEAVY T-SHIRT_SCARAB_VIPALTEX1MANU648_ QC (1).xlsx'):
      return 'DATA TOPS'
    if file_path.endswith('PTR.23.00700.4101_RESPOKE_VIPALTEX_21.09.2023_HEAVY T-SHIRT_SCARAB_VIPALTEX1MANU648_ QC.xlsx'):
      return 'DATA TOPS'
    if file_path.endswith('PTR23.00604_Respoke_Ritedu_2023_08_04_Cord Shirt_Dark Navy_RITEDU20MANU620_IR.xlsx'):
      return 'DATA TOPS'
    if file_path.endswith('PTR23.00605_Respoke_Ritedu_2023_08_04_Cord Shirt_Tawny Port_RITEDU20MANU620_IR.xlsx'):
      return 'DATA TOPS'
    if file_path.endswith('RITEDU22MANU659_IR_CHAMBRAY.xlsx'):
      return 'DATA TOPS'
    if file_path.endswith('2023_11_27_RITEDU22MANU659 DARK_CHAMBRAY.xlsx'):
      return 'DATA TOPS'

    for sheet_name in sheet_names:

      # Check if sheet name exists
      try:
        df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)
      except:
        continue


      # Check whether it's the right sheet by looking at supplier and PO number values
      supplier_row = df[df[main_info_column] == 'Supplier:']
      supplier_value = supplier_row.iloc[0, main_info_column + 1]

      po_row = df[df[main_info_column] == 'PO number:']
      po_value = po_row.iloc[0, main_info_column + 1]

      if pd.notna(supplier_value) or pd.notna(po_value):
        return sheet_name
      else:
        continue

    print('🔴 Sheet name with data summary not found (neither DATA TROUSERS, nor DATA TOPS, nor DATA BOXERS)')
    return 'Not found'

########################################################################################################################################################################################################################

def extract_file_summary(file_path, qc_files_fields_to_extract, sheet_name, main_info_column):

    # Read sheet containing summary
    df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)

    # Initialise field values list
    field_values = []

    # Loop over fields to extract
    for field_name in qc_files_fields_to_extract:

      row = df[df[main_info_column] == field_name]

      # If field does not exist in main column then add '' as its value, else get its value from next column and append to field values list
      if row.empty:
        field_value = ''
      else:
        field_value = row.iloc[0, main_info_column+1]

      field_values.append(field_value)

    # Create and return dataframe containing row of QC File summary
    qc_file_summary = pd.DataFrame([field_values], columns=qc_files_fields_to_extract)
    # display(qc_file_summary)
    # qc_file_summary = transform_qc_files(qc_file_summary)
    return qc_file_summary

########################################################################################################################################################################################################################
########################################################################################################################################################################################################################

def extract_qc_units(file_path, qc_units_sheet, garment_type):

  df = pd.read_excel(file_path, sheet_name=qc_units_sheet, header=None)

  columns_to_extract = [1, 2, 3, 4, 8, 9, 13, 14, 18, 19, 23, 24, 28, 29, 33, 34, 38, 39]
  starting_row = 3

  if garment_type == 'Tops':
    qc_units = df.iloc[starting_row:, columns_to_extract].copy()
    qc_units.columns = qc_units_tops_fields
    qc_units = qc_units[qc_units['unit_size'].notna()].reset_index(drop=True)

  elif garment_type == 'Trousers':

    qc_units = df.iloc[starting_row:, columns_to_extract].copy()
    qc_units.columns = qc_units_trousers_fields
    qc_units = qc_units[qc_units['unit_size'].notna()].reset_index(drop=True)

  else:
    print('Logic for this garment_type not yet added. Skipping unit extraction')

  return qc_units

########################################################################################################################################################################################################################
########################################################################################################################################################################################################################

def create_qc_files_table(folder_path, sheet_names, qc_files_fields_to_extract, max_files=500):

    # Step 1: Initialise qc files df
    qc_files = pd.DataFrame(columns=qc_files_fields_to_extract + ['file_name'])
    qc_files_list = []

    # Step 2: Loop through folder containing QC Files and append to qc_files table
    file_count = 1
    total_files = len(os.listdir(folder_path))

    for file_name in os.listdir(folder_path):
        if file_count > max_files:
            break

        # Get QC File path
        file_path = os.path.join(folder_path, file_name)

        # Display progress
        print(f'📁 {file_count} / {total_files} Processing File: {file_name}')

        # Get main info column (in which column is summary held? 1, 2, or 3)
        main_info_column = assess_main_info_column(file_path, sheet_names)

        # Get main sheet name (in which sheet is summary held? Tops, Trousers, or Boxers?)
        sheet_name = assess_sheet_name(file_path, sheet_names, main_info_column)

        # Extract File Summary for QC File
        try:
            file_summary = extract_file_summary(file_path, qc_files_fields_to_extract, sheet_name, main_info_column)
            file_summary['file_name'] = file_name
            qc_files_list.append(file_summary)
        except Exception as e:
            print(f'🔴 Failed to process summary for this file: {e}.')
            # print(f'File Summary: \n')
            # display(file_summary)

        file_count += 1

    # Step 3: Concatenate all QC File DFs
    qc_files = pd.concat(qc_files_list, ignore_index=True)

    # Step 4: Add folder_name field
    qc_files['folder_name'] = os.path.basename(folder_path)

    qc_files = qc_files.reset_index(drop=True)

    # Step 5: Return QC Files table
    return qc_files

########################################################################################################################################################################################################################
########################################################################################################################################################################################################################

def clean_date(date):
    """
    Cleans and standardizes the date to the 'YYYY-MM-DD' format, including additional formats like 'DD.MM.YYYY'
    and Excel date format (days since 1900-01-01).

    Args:
    date (str or int): The date in various formats, including integer for Excel date format.

    Returns:
    str: The standardized date string in 'YYYY-MM-DD' format, or the original date if not parsable.
    """
    for fmt in ("%d/%m/%Y", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%d.%m.%Y"):
        try:
            return datetime.strptime(str(date), fmt).date().isoformat()
        except (ValueError, TypeError):
            continue

    # Handle Excel date format (days since 1900-01-01)
    try:
        if isinstance(date, (int, float)) and date > 0:
            return (datetime(1900, 1, 1) + pd.Timedelta(days=date-2)).date().isoformat()  # Adjusting for Excel's leap year bug
    except Exception as e:
        pass

    # Return the original date if not parsable
    return date

# For entries in quantities like: 307 (22 cartons)
def extract_first_number(s):
    match = re.search(r'\d+', s)
    return match.group(0) if match else ''

def transform_qc_files(qc_files):
    # Rename columns
    qc_files = qc_files.rename(columns={
                                        'QC PASS/FAIL': 'qc_result',
                                        'Inspection Type (to be completed by Spoke):': 'inspection_type',
                                        'Number of units rejected on measurement': 'units_rejected_measurement',
                                        'Number of units rejected on quality': 'units_rejected_quality',
                                        'Number of units rejected on quality - Major': 'units_rejected_quality_major',
                                        'Number of units rejected on quality - Minor': 'units_rejected_quality_minor',
                                        'Number of units rejected on quality - Critical': 'units_rejected_quality_critical',
                                        'Style/Material/Colour - Conform': 'conform_style',
                                        'Packing - Conform': 'conform_packing',
                                        'Marking/Label (barcodes readable and matching product) - Conform': 'conform_label',
                                        'Odour test - Conform': 'conform_odour',
                                        'Hem (Shorts only)': 'hem'
                                        })

    # Reformat column names
    qc_files.columns = qc_files.columns.str.rstrip(':').str.strip().str.lower().str.replace(' ', '_')

    # Reformat string fields (trim and capitalise)
    string_fields = ['inspection_type', 'qc_provider', 'supplier', 'supplier_country', 'po_number', 'garment_type', 'style', 'colour']

    for string_field in string_fields:
      qc_files[string_field] = qc_files[string_field].str.strip().str.upper()

    # Add file_id (PO + Style + Colour + Fit)
    qc_files['file_id'] = qc_files['inspection_type'].astype(str) + ' | ' + qc_files['po_number'].astype(str) + ' | ' + qc_files['style'].astype(str) + ' | ' + qc_files['colour'].astype(str)
    qc_files['batch_id'] = qc_files['po_number'].astype(str) + ' | ' + qc_files['style'].astype(str) + ' | ' + qc_files['colour'].astype(str)

    # Clean dates using "clean_date" function
    qc_files['date_of_inspection'] = qc_files['date_of_inspection'].apply(lambda x: clean_date(x))

    # Clean entries in quantities like: 307 (22 cartons) by keeping the first number
    unclean_fields = ['qty_declared', 'qty_packed']

    for field in unclean_fields:
      qc_files[field] = qc_files[field].astype(str)
      qc_files[field] = qc_files[field].apply(lambda x: extract_first_number(x))

    # Reorder Columns
    # qc_files = qc_files[['file_name', 'file_id', 'po_number', 'garment_type', 'style', 'colour', 'qc_provider', 'supplier', 'supplier_country', 'date_of_inspection', 'order_qty', 'units_inspected', 'units_measured', 'qc_result']]

    return qc_files

## 🟥 Inputs

In [None]:


# Sheet names that can hold summary data
sheet_names = ('DATA TROUSERS', 'DATA TOPS', 'DATA BOXERS')

# QC Files: Fields to extract from summary page
qc_files_fields_to_extract = [
                            'Inspection Type (to be completed by Spoke):',
                            'QC provider:',
                            'Supplier:',
                            'Supplier country:',
                            'Date of inspection:',
                            'PO number:',
                            'Garment type:',
                            'Style:',
                            'Colour:',

                            'Style/Material/Colour - Conform',
                            'Packing - Conform',
                            'Marking/Label (barcodes readable and matching product) - Conform',
                            'Odour test - Conform',

                            'Order QTY:',
                            'QTY declared:',
                            'QTY packed:',
                            'Units inspected:',
                            'Units measured:',
                            'Number of units rejected on measurement',
                            'Number of units rejected on quality',
                            'Number of units rejected on quality - Major',
                            'Number of units rejected on quality - Minor',
                            'Number of units rejected on quality - Critical',

                            'Waist','Hip','Front rise','Back rise','Thigh','Knee','Hem (Shorts only)',

                            'Chest','Front Length','Sleeve Length','Muscle','Cuff Width','Neck Circ', # They have waist too but I'm removing it for now bc we get it from trousers ones before. Keep it in mind tho in case functionality changes

                            'QC PASS/FAIL']


## 🟥 Run

In [None]:
folder_path_aw23 = '/content/drive/MyDrive/QC_files_AW23'
folder_path_ss24 = '/content/drive/MyDrive/QC_files_SS24'

qc_files_aw23 = create_qc_files_table(folder_path_aw23, sheet_names, qc_files_fields_to_extract, max_files=500)
qc_files_ss24 = create_qc_files_table(folder_path_ss24, sheet_names, qc_files_fields_to_extract, max_files=500)



📁 1 / 204 Processing File: KAYNAK13MANU640 New Rise BLACK  (BLACK ONYX) reg.xlsx
📁 2 / 204 Processing File: KAYNAK13MANU640 New Rise BLACK  (BLACK ONYX) taper.xlsx
📁 3 / 204 Processing File: KAYNAK13MANU640 New Rise FLINTSTONE (INFINITY) reg.xlsx
📁 4 / 204 Processing File: KAYNAK13MANU640 New Rise FLINTSTONE (INFINITY) taper.xlsx
📁 5 / 204 Processing File: KAYNAK13MANU640 New Rise ICEBERG (QUARRY) taper.xlsx
📁 6 / 204 Processing File: KAYNAK13MANU640 New Rise MAROON (RHUBARB) taper.xlsx
📁 7 / 204 Processing File: KAYNAK13MANU640 New Rise SAGE (ICEBERG GREEN) reg.xlsx
📁 8 / 204 Processing File: KAYNAK13MANU640 New Rise SAGE (ICEBERG GREEN) taper.xlsx
📁 9 / 204 Processing File: KAYNAK14MANU642 New Rise medieval blue taper.xlsx
📁 10 / 204 Processing File: KAYNAK14MANU642 New Rise medieval blue reg.xlsx
📁 11 / 204 Processing File: KAYNAK13MANU640 New Rise MAROON (RHUBARB) reg.xlsx
📁 12 / 204 Processing File: KAYNAK13MANU640 ICEBERG New Rise (QUARRY).xlsx
📁 13 / 204 Processing File: KAYNAK1

In [None]:
qc_files = pd.concat([qc_files_aw23, qc_files_ss24]).reset_index(drop=True)
qc_files = transform_qc_files(qc_files)

display(qc_files)

Unnamed: 0,inspection_type,qc_provider,supplier,supplier_country,date_of_inspection,po_number,garment_type,style,colour,conform_style,...,front_length,sleeve_length,muscle,cuff_width,neck_circ,qc_result,file_name,folder_name,file_id,batch_id
0,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES,BLACK (BLACK ONYX),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise BLACK (BLACK ONYX) r...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES | BLACK (BLA...,KAYNAK13MANU640 | HEROES | BLACK (BLACK ONYX)
1,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES TAPER,BLACK (BLACK ONYX),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise BLACK (BLACK ONYX) t...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES TAPER | BLACK...,KAYNAK13MANU640 | HEROES TAPER | BLACK (BLACK...
2,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES,FLINTSTONE (INFINITY),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise FLINTSTONE (INFINITY)...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES | FLINTSTONE ...,KAYNAK13MANU640 | HEROES | FLINTSTONE (INFINITY)
3,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES TAPER,FLINTSTONE (INFINITY),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise FLINTSTONE (INFINITY)...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES TAPER | FLINT...,KAYNAK13MANU640 | HEROES TAPER | FLINTSTONE (I...
4,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES,ICEBERG (QUARRY),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise ICEBERG (QUARRY) tape...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES | ICEBERG (QU...,KAYNAK13MANU640 | HEROES | ICEBERG (QUARRY)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240,FIRST,SGS,LAMOSA,PORTUGAL,2024-02-01,LAMOSA90MANU675,TROUSERS,FIVES,AMMONITE,Subject to clients evaluation,...,,,,,,PASS,PTR24.00072.4101_RESPOKE_LAMOSA_01.02.2024_FIV...,QC_files_SS24,FIRST | LAMOSA90MANU675 | FIVES | AMMONITE,LAMOSA90MANU675 | FIVES | AMMONITE
241,FIRST,BV,KAYNAK,TURKEY,2024-02-14,KAYNAK17MANU682,TROUSERS,HEROES,BLACK,Not Conform,...,,,,,,PASS,KAYNAK17MANU682-HEROES-BLACK.xlsx,QC_files_SS24,FIRST | KAYNAK17MANU682 | HEROES | BLACK,KAYNAK17MANU682 | HEROES | BLACK
242,FIRST,BV,KAYNAK,TURKEY,2024-02-14,KAYNAK17MANU682,TROUSERS,HEROES,MARS (BRICK RED),Not Conform,...,,,,,,FAIL,KAYNAK17MANU682-HEROES-MARS-(BRICK RED.xlsx,QC_files_SS24,FIRST | KAYNAK17MANU682 | HEROES | MARS (BRICK...,KAYNAK17MANU682 | HEROES | MARS (BRICK RED)
243,FIRST,BV,KAYNAK,TURKEY,2024-02-15,KAYNAK17MANU682,TROUSERS,HEROES,DUSTY ROSE,Not Conform,...,,,,,,FAIL,KAYNAK17MANU682 - HEROES - DUSTY ROSE.xlsx,QC_files_SS24,FIRST | KAYNAK17MANU682 | HEROES | DUSTY ROSE,KAYNAK17MANU682 | HEROES | DUSTY ROSE


In [None]:
issues = {
    "missing_supplier": (qc_files['supplier'].isna()) | (qc_files['supplier'] == ''),
    "missing_po": (qc_files['po_number'].isna()) | (qc_files['po_number'] == ''),
    "missing_date_of_inspection": (qc_files['date_of_inspection'].isna()) | (qc_files['date_of_inspection'] == '')
          }

for issue_name, issue_logic in issues.items():
    print(f'{qc_files[issue_logic].shape[0]} files: {issue_name}')

qc_files.to_excel(f'qc_file_summaries.xlsx', index=False)

20 files: missing_supplier
35 files: missing_po
50 files: missing_date_of_inspection


In [None]:
for issue in issues:
  display(issue, qc_files[issues[issue]])


'missing_supplier'

Unnamed: 0,inspection_type,qc_provider,supplier,supplier_country,date_of_inspection,po_number,garment_type,style,colour,conform_style,...,front_length,sleeve_length,muscle,cuff_width,neck_circ,qc_result,file_name,folder_name,file_id,batch_id
124,FIRST,SGS,,VIETNAM,2023-07-28,SAITEX11MANU628,TROUSERS,12 OZ DENIM TAPER RINSE/BLACK,BLACK,Conform,...,,,,,,PASS,FRI Report - 40311719-04 - Respoke Limited - P...,QC_files_AW23,FIRST | SAITEX11MANU628 | 12 OZ DENIM TAPER RI...,SAITEX11MANU628 | 12 OZ DENIM TAPER RINSE/BLAC...
125,FIRST,SGS,,VIETNAM,2023-07-28,SAITEX11MANU628,TROUSERS,12 OZ DENIM RINSE/BLACK,BLACK,Conform,...,,,,,,PASS,FRI Report - 40311719-03 - Respoke Limited - P...,QC_files_AW23,FIRST | SAITEX11MANU628 | 12 OZ DENIM RINSE/BL...,SAITEX11MANU628 | 12 OZ DENIM RINSE/BLACK | BLACK
126,FIRST,SGS,,VIETNAM,2023-07-28,SAITEX11MANU628,TROUSERS,12 OZ DENIM TAPER RINSE/BLACK,RINSE,Conform,...,,,,,,PASS,FRI Report - 40311719-02 - Respoke Limited - P...,QC_files_AW23,FIRST | SAITEX11MANU628 | 12 OZ DENIM TAPER RI...,SAITEX11MANU628 | 12 OZ DENIM TAPER RINSE/BLAC...
127,FIRST,SGS,,VIETNAM,2023-07-28,SAITEX11MANU628,TROUSERS,12 OZ DENIM RINSE/BLACK,RINSE,Conform,...,,,,,,PASS,FRI Report - 40311719-01 - Respoke Limited - P...,QC_files_AW23,FIRST | SAITEX11MANU628 | 12 OZ DENIM RINSE/BL...,SAITEX11MANU628 | 12 OZ DENIM RINSE/BLACK | RINSE
143,FIRST,SGS,,VIETNAM,2023-07-28,SAITEX11MANU628,TROUSERS,12 OZ DENIM TAPER RINSE/BLACK,BLACK,Conform,...,,,,,,PASS,SAITEX11MANU628 12 OZ DENIM TAPER - BLACK.xlsx,QC_files_AW23,FIRST | SAITEX11MANU628 | 12 OZ DENIM TAPER RI...,SAITEX11MANU628 | 12 OZ DENIM TAPER RINSE/BLAC...
145,FIRST,SGS,,VIETNAM,2023-06-26,SAITEX10MANU627,TROUSERS,12 OZ DENIM BROKEN IN/CHARCOAL,BROKEN IN,Conform,...,,,,,,PASS,SAITEX10MANU627 12 OZ DENIM - BROKEN IN.xlsx,QC_files_AW23,FIRST | SAITEX10MANU627 | 12 OZ DENIM BROKEN I...,SAITEX10MANU627 | 12 OZ DENIM BROKEN IN/CHARCO...
151,FIRST,SGS,,VIETNAM,2023-07-29,SAITEX11MANU628,TROUSERS,12 OZ DENIM BROKEN IN/CHARCOAL,BROKEN IN,Conform,...,,,,,,PASS,SAITEX11MANU628 - 12 OZ DENIM - BROKEN IN.xlsx,QC_files_AW23,FIRST | SAITEX11MANU628 | 12 OZ DENIM BROKEN I...,SAITEX11MANU628 | 12 OZ DENIM BROKEN IN/CHARCO...
158,FIRST,SGS,,VIETNAM,2023-06-26,SAITEX10MANU627,TROUSERS,12 OZ DENIM TAPER RINSE/BLACK,RINSE,,...,,,,,,PASS,SAITEX10MANU627 - 12 OZ DENIM TAPER - RINSE.xlsx,QC_files_AW23,FIRST | SAITEX10MANU627 | 12 OZ DENIM TAPER RI...,SAITEX10MANU627 | 12 OZ DENIM TAPER RINSE/BLAC...
161,FIRST,SGS,,VIETNAM,2023-07-28,SAITEX11MANU628,TROUSERS,12 OZ DENIM RINSE/BLACK,BLACK,Conform,...,,,,,,PASS,SAITEX11MANU628 - 12 OZ DENIM - BLACK.xlsx,QC_files_AW23,FIRST | SAITEX11MANU628 | 12 OZ DENIM RINSE/BL...,SAITEX11MANU628 | 12 OZ DENIM RINSE/BLACK | BLACK
163,FIRST,SGS,,VIETNAM,2023-06-19,SAITEX9MANU610,TROUSERS,STAY BLACK DENIM,,Conform,...,,,,,,PASS,SAITEX9MANU610 - 12 OZ DENIM - STAY BLACK.xlsx,QC_files_AW23,FIRST | SAITEX9MANU610 | STAY BLACK DENIM | nan,SAITEX9MANU610 | STAY BLACK DENIM | nan


'missing_po'

Unnamed: 0,inspection_type,qc_provider,supplier,supplier_country,date_of_inspection,po_number,garment_type,style,colour,conform_style,...,front_length,sleeve_length,muscle,cuff_width,neck_circ,qc_result,file_name,folder_name,file_id,batch_id
24,FIRST,,VIPALTEX,,,,TROUSERS,MOLESKIN FIVES,,Subject to clients evaluation,...,,,,,,FAIL,PTR.23.00700.4101_RESPOKE_VIPALTEX_21.09.2023_...,QC_files_AW23,FIRST | nan | MOLESKIN FIVES | nan,nan | MOLESKIN FIVES | nan
60,FIRST,,KAYNAK,,,,TROUSERS,HEROES TAPER,ICEBERG (QUARRY),,...,,,,,,FAIL,HEROES Taper ICEBERG (QUARRY) BV_son ölçü.xlsx,QC_files_AW23,FIRST | nan | HEROES TAPER | ICEBERG (QUARRY),nan | HEROES TAPER | ICEBERG (QUARRY)
63,FIRST,,KAYNAK,,,,TROUSERS,HEROES TAPER,OLIVE,,...,,,,,,FAIL,HEROES Taper Olive BV 16.08.xlsx,QC_files_AW23,FIRST | nan | HEROES TAPER | OLIVE,nan | HEROES TAPER | OLIVE
65,FIRST,,KAYNAK,,,,TROUSERS,HEROES,OLIVE,,...,,,,,,FAIL,HEROES reg OliveBV 16.08.xlsx,QC_files_AW23,FIRST | nan | HEROES | OLIVE,nan | HEROES | OLIVE
69,FIRST,,KAYNAK,,,,TROUSERS,HEROES TAPER,NAVY,,...,,,,,,FAIL,HEROES Taper Navy BV.xlsx,QC_files_AW23,FIRST | nan | HEROES TAPER | NAVY,nan | HEROES TAPER | NAVY
71,FIRST,,KAYNAK,,,,TROUSERS,HEROES,HARVEST GOLD,,...,,,,,,FAIL,HEROES Reg Harvest Gold BV.xlsx,QC_files_AW23,FIRST | nan | HEROES | HARVEST GOLD,nan | HEROES | HARVEST GOLD
72,FIRST,,KAYNAK,,,,TROUSERS,HEROES TAPER,ONYX,,...,,,,,,PASS,HEROES Taper Onyx BV_son ölçü.xlsx,QC_files_AW23,FIRST | nan | HEROES TAPER | ONYX,nan | HEROES TAPER | ONYX
74,FIRST,,KAYNAK,,,,TROUSERS,HEROES,OLIVE,,...,,,,,,FAIL,HEROES reg OliveBV.xlsx,QC_files_AW23,FIRST | nan | HEROES | OLIVE,nan | HEROES | OLIVE
77,FIRST,,KAYNAK,,,,TROUSERS,HEROES TAPER,SMOKED NAVY (CROWN BLUE),,...,,,,,,FAIL,HEROES Taper Smoked Navy (Crown Blue) BV.xlsx,QC_files_AW23,FIRST | nan | HEROES TAPER | SMOKED NAVY (CROW...,nan | HEROES TAPER | SMOKED NAVY (CROWN BLUE)
78,FIRST,,KAYNAK,,,,TROUSERS,HEROES,ICEBERG (QUARRY),,...,,,,,,FAIL,HEROES Reg ICEBERG (QUARRY) BV_son ölçü.xlsx,QC_files_AW23,FIRST | nan | HEROES | ICEBERG (QUARRY),nan | HEROES | ICEBERG (QUARRY)


'missing_date_of_inspection'

Unnamed: 0,inspection_type,qc_provider,supplier,supplier_country,date_of_inspection,po_number,garment_type,style,colour,conform_style,...,front_length,sleeve_length,muscle,cuff_width,neck_circ,qc_result,file_name,folder_name,file_id,batch_id
0,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES,BLACK (BLACK ONYX),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise BLACK (BLACK ONYX) r...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES | BLACK (BLA...,KAYNAK13MANU640 | HEROES | BLACK (BLACK ONYX)
1,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES TAPER,BLACK (BLACK ONYX),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise BLACK (BLACK ONYX) t...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES TAPER | BLACK...,KAYNAK13MANU640 | HEROES TAPER | BLACK (BLACK...
2,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES,FLINTSTONE (INFINITY),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise FLINTSTONE (INFINITY)...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES | FLINTSTONE ...,KAYNAK13MANU640 | HEROES | FLINTSTONE (INFINITY)
3,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES TAPER,FLINTSTONE (INFINITY),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise FLINTSTONE (INFINITY)...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES TAPER | FLINT...,KAYNAK13MANU640 | HEROES TAPER | FLINTSTONE (I...
4,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES,ICEBERG (QUARRY),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise ICEBERG (QUARRY) tape...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES | ICEBERG (QU...,KAYNAK13MANU640 | HEROES | ICEBERG (QUARRY)
5,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES TAPER,MAROON (RHUBARB),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise MAROON (RHUBARB) tape...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES TAPER | MAROO...,KAYNAK13MANU640 | HEROES TAPER | MAROON (RHUBARB)
6,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES,SAGE (ICEBERG GREEN),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise SAGE (ICEBERG GREEN) ...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES | SAGE (ICEBE...,KAYNAK13MANU640 | HEROES | SAGE (ICEBERG GREEN)
7,FIRST,BV,KAYNAK,,,KAYNAK13MANU640,TROUSERS,HEROES TAPER,SAGE (ICEBERG GREEN),Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK13MANU640 New Rise SAGE (ICEBERG GREEN) ...,QC_files_AW23,FIRST | KAYNAK13MANU640 | HEROES TAPER | SAGE ...,KAYNAK13MANU640 | HEROES TAPER | SAGE (ICEBERG...
8,FIRST,BV,KAYNAK,,,KAYNAK14MANU642,TROUSERS,HEROES TAPER,MEDIEVAL BLUE,Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK14MANU642 New Rise medieval blue taper.xlsx,QC_files_AW23,FIRST | KAYNAK14MANU642 | HEROES TAPER | MEDIE...,KAYNAK14MANU642 | HEROES TAPER | MEDIEVAL BLUE
9,FIRST,BV,KAYNAK,,,KAYNAK14MANU642,TROUSERS,HEROES,MEDIEVAL BLUE,Subject to clients evaluation,...,,,,,,INCOMPLETE,KAYNAK14MANU642 New Rise medieval blue reg.xlsx,QC_files_AW23,FIRST | KAYNAK14MANU642 | HEROES | MEDIEVAL BLUE,KAYNAK14MANU642 | HEROES | MEDIEVAL BLUE


In [None]:
for col in qc_files.columns:
  print(col, qc_files[col].unique())

inspection_type ['FIRST' '' nan]
qc_provider ['BV' 'SGS' nan '' 'TORQUE']
supplier ['KAYNAK' 'HKS' 'IMPULSE' 'VIPALTEX' 'ECOVEST' 'LAMOSA' 'SARTIUS' 'TAL'
 'VALERIUS' 'STAR GARMENTS' 'RITEDU' 'FAIRPLAY' 'SAITEX' nan]
supplier_country [nan 'TURKEY' 'CHINA' 'INDIA' 'PORTUGAL' 'VIETNAM' 'SRI LANKA' 'ROMANIA']
date_of_inspection [nan '2023-11-01' '2023-11-28' '2023-09-15' '2023-10-03' '2023-12-05'
 '2023-10-11' '2023-10-10' '2023-10-24' '2023-09-21' '2023-09-19'
 '2023-09-11' '2023-09-28' '2023-09-27' '2023-09-18' '2023-09-14'
 '2023-09-06' '2023-09-05' '2023-09-04' '2023-08-06' '2023-08-07'
 '2023-08-09' '2023-08-10' '2023-08-04' '2023-07-31' '2023-08-03'
 '2023-12-09' '2023-12-28' '2023-10-25' '2023-07-28' '2024-02-07'
 '2023-07-05' '2023-07-10' '2023-06-30' '2023-03-30' '2023-07-11'
 '2023-04-27' '2023-06-26' '2023-07-29' '2023-05-31' '2023-06-19'
 '2023-07-12' '2023-10-18' '2023-09-13' '2023-10-19' '2023-10-20'
 '2024-01-10' '2024-01-05' '2023-12-30' '2023-12-27' '2023-12-26'
 '2023-12

In [None]:
# # QC Units: Fields in QC Units tables
# qc_units_tops_fields = [
#                             'unit_size',
#                             'unit_fit',
#                             'unit_length',
#                             'approved',
#                             'm_chest', 'r_chest',
#                             'm_waist', 'r_waist',
#                             'm_front_length', 'r_front_length',
#                             'm_sleeve_length', 'r_sleeve_length',
#                             'm_muscle', 'r_muscle',
#                             'm_cuff_width', 'r_cuff_width',
#                             'm_neck_circ', 'r_neck_circ'
#                         ]

# qc_units_tops = pd.DataFrame(columns=qc_units_tops_fields)

# qc_units_trousers_fields = [
#                               'unit_size',
#                               'unit_fit',
#                               'unit_length',
#                               'approved',
#                               'm_half_waist', 'r_half_waist',
#                               'm_low_hip', 'r_low_hip',
#                               'm_front_rise', 'r_front_rise',
#                               'm_back_rise', 'r_back_rise',
#                               'm_thigh', 'r_thigh',
#                               'm_knee', 'r_knee',
#                               'm_hem', 'r_hem'
#                           ]

# qc_units_trousers = pd.DataFrame(columns=qc_units_trousers_fields)

# # QC Units: Garment type --> Sheet
# qc_unit_sheets_mapping = {'Tops': 'MEASUREMENTS TOPS',
#                  'Trousers': 'MEASUREMENTS TROUSERS',
#                  'Boxers': 'MEASUREMENTS BOXERS'}