In [277]:
import numpy as np
import pandas as pd
import re
import os
from dotenv import load_dotenv

In [201]:
load_dotenv()

# pd.reset_option('display.max_rows')
# pd.reset_option('display.max_columns')

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [189]:
non_ascii_replacement_dict = {
    '€': '<euro>',
    '¢': '<cent>',
    '₹': '<rupee>',
    '×': 'X',
    '²': '<pow2>',
    '³': '<pow3>',
    '™': '<trademark>',
    '®': '<registered>',
    '–': '<dash>'
}

In [190]:
def clean_text(text):
    if isinstance(text, str):
        text = text.replace('“', '"').replace('”', '"').replace('″', '"')
        text = text.replace('‘', "'").replace('’', "'")

        # replace space and 2 or more double quotes with one & remove space 
        text = re.sub(r'(\d)\s?""*', r'\1"', text)
        
        text = re.sub(r"\s?''", r'"', text)
       
        # replace degree sign with text
        text = re.sub(r'\s?°', r'<deg>', text)

        # replace the non-ascii characters in the dict with their defined replacement         
        for pattern, replace in non_ascii_replacement_dict.items():
            text = re.sub(re.escape(pattern), re.escape(replace), text)

        text = re.sub(r'[^\x00-\x7F]', '', text)    
    return text

In [197]:
def to_uppercase(text):
    if isinstance(text, str):
        text = text.upper()
    return text

#### Code to find different patterns of double quotes to identify outliers

In [4]:
# not useful to find these patterns as we are not deriving relations
def find_double_quotes_pattern(text):
    cleaned_text = clean_text(text)

    if isinstance(cleaned_text, str):
        number_inside_double_quotes = re.search(r'"\d+(\.\d+)?"', cleaned_text)
        number_suffix_double_quotes = re.search(r'\d+(\.\d+)?"', cleaned_text)
        text_inside_double_quotes = re.search(r'"([^0-9.]+)"', cleaned_text)
        
        if number_inside_double_quotes is not None or number_suffix_double_quotes is not None or text_inside_double_quotes is not None:
            print("Text = ", text)
            print("Cleaned Text = ", cleaned_text)
        
        if number_inside_double_quotes is not None:
            print("Number inside double quotes")
            print(number_inside_double_quotes)
            
        if number_suffix_double_quotes is not None:
            print("Number suffix double quotes")
            print(number_suffix_double_quotes)

        if text_inside_double_quotes is not None:
            print("Text inside double quotes")
            print(text_inside_double_quotes)
            
        return bool(number_inside_double_quotes) or bool(number_suffix_double_quotes) or bool(text_inside_double_quotes)
    
    return False
    # return number_inside_double_quotes, number_suffix_double_quotes, text_inside_double_quotes

In [191]:
def find_non_ascii(text):
    if isinstance(text, str):
        # |[\"]
        return bool(re.search(r'[^\x00-\x7F]', text))
    return False

In [192]:
def validate_non_ascii(df):
    non_ascii_entries = df.apply(lambda col: col.apply(find_non_ascii))
    rows_with_non_ascii = df[non_ascii_entries.any(axis=1)]    
    return rows_with_non_ascii

In [193]:
def contains_specific_signs(item_name, signs):
    return any(sign in str(item_name) for sign in signs)

In [248]:
def grouped_unique_df(dataframe: pd.DataFrame, group_by_field: str | list[str], unique_field: str):
    if not isinstance(group_by_field, list):
        group_by = [group_by_field]
    else:
        group_by = group_by_field

    grouped = pd.DataFrame(dataframe.groupby(group_by)[unique_field].unique())
    grouped_df = grouped.reset_index()
    
    columns = []
    columns.extend(group_by)
    columns.append(unique_field)
    grouped_df.columns = columns

    unique_field_count = unique_field + '_COUNT'
    grouped_df[unique_field_count] = grouped_df[unique_field].apply(len)
    grouped_df = grouped_df.sort_values(by=unique_field_count, ascending=False)
    return grouped_df
    

In [194]:
df = pd.read_excel(os.path.join(os.getenv('PURCHASE_ORDER_FOLDER_PATH'), 'Data 1.xlsx'))
df.columns = df.columns.str.replace(' ', '_').str.upper()

In [209]:
df[['PO_NUM', 'ITEM_CODE', 'ITEM_NAME']].head(20)

Unnamed: 0,PO_NUM,ITEM_CODE,ITEM_NAME
0,232431279,MC11560,Bracket no A-081-G MDR Locking Strip
1,232471206,4556800,Locking Plate
2,232472169,,Charges for embossing tool trial Proto
3,72616,,DR Solution @12
4,72616,,During The DR Drill Acrivity @2
5,72616,,"VM Installation & Configuration, Virtual Dedic..."
6,232420215,,DR Solution @1
7,52780,,PRODUCT IMPLEMENTATION & SERVICES FOR SEIMENS ...
8,52780,,ePlan TC side setup on Test server
9,73958,,NX CAD/CAM Total Machining NX 12490 for one y...


In [27]:
# check ITEM_NAME contains the specific sign/symbol and get count
df[df['ITEM_NAME'].apply(lambda x: contains_specific_signs(x, ['®']))][['ITEM_NAME']]

Unnamed: 0,ITEM_NAME
1492,EasyCAT spi_on_13_12_1 EtherCATÂ® 32 byte in i...
2239,conductor female connector CAGE CLAMP® 1.5 mm²...
3055,SOM-2569BN0C-S3A1 SMARC MODULE Intel® E3930 4G RA
5299,conductor female connector CAGE CLAMP® 0.5 mm²...
5300,FEMALE PLUG WITH CODIG FINGERS 6 POLE PIN SPAC...
...,...
43511,"SanDisk Ultra® microSDXC UHS-I Card, 64GB, 140..."
45426,LOCTITE® EA E-214HP™
45468,SRG0114 ScriptronicsÂ® UNO R3 ATmega328P SMD C...
46146,LOCTITE® 243 Blue Medium Strength Threadlocker...


In [196]:
validate_non_ascii(df[['ITEM_NAME']])

Unnamed: 0,ITEM_NAME
33,11879651Belt 6100 CC _Disa (shot blasting )
34,"5-1002-003Bucket DIN15232-C200x140X2,0GK _..."
35,4-2401-052Connection Elevator Belt _Disa (s...
36,10363906Screw M10x35-8.8 DIN15237 _Disa (sh...
37,10128231Nut M10-6 ZN DIN1587 _Disa (shot bl...
...,...
50289,Dynamo 500 – Side cover –Mould-Al
50290,Dynamo 500 – Side cover – Plug
50299,AMR 100 – Diffuser Mould
50419,ZIPPY 25 - TOP COVER – SEPERATOR - Mould-Al


In [198]:
df_cleaned = df.apply(lambda col: col.apply(clean_text))
df_cleaned = df_cleaned.apply(lambda col: col.apply(to_uppercase))

In [210]:
df_cleaned[['PO_NUM', 'ITEM_CODE', 'ITEM_NAME']].head(20)

Unnamed: 0,PO_NUM,ITEM_CODE,ITEM_NAME
0,232431279,MC11560,BRACKET NO A-081-G MDR LOCKING STRIP
1,232471206,4556800,LOCKING PLATE
2,232472169,,CHARGES FOR EMBOSSING TOOL TRIAL PROTO
3,72616,,DR SOLUTION @12
4,72616,,DURING THE DR DRILL ACRIVITY @2
5,72616,,"VM INSTALLATION & CONFIGURATION, VIRTUAL DEDIC..."
6,232420215,,DR SOLUTION @1
7,52780,,PRODUCT IMPLEMENTATION & SERVICES FOR SEIMENS ...
8,52780,,EPLAN TC SIDE SETUP ON TEST SERVER
9,73958,,NX CAD/CAM TOTAL MACHINING NX 12490 FOR ONE Y...


In [211]:
validate_non_ascii(df_cleaned[['ITEM_NAME']])

Unnamed: 0,ITEM_NAME


In [213]:
cleaned_item_name = pd.DataFrame(df_cleaned[df_cleaned['ITEM_NAME'].apply(lambda x: contains_specific_signs(x, ['<TRADEMARK>']))]['ITEM_NAME'])
cleaned_item_name

Unnamed: 0,ITEM_NAME
26089,102548R0 SPROCKET 20 TEETH 0.75<EURO><TRADEMAR...
26126,102548R0 SPROCKET 20 TEETH 0.75<TRADEMARK><TRA...
26133,102548R0 SPROCKET 20 TEETH 0.75<TRADEMARK><TRA...
26173,102548R0 SPROCKET 20 TEETH 0.75<TRADEMARK><TRA...
33453,INTEL<REGISTERED> REALSENSE<TRADEMARK> DEPTH C...
43350,NVIDIA<REGISTERED> JETSON ORIN NANO<TRADEMARK>...
43432,INTEL<REGISTERED> REALSENSE<TRADEMARK> DEPTH C...
43435,NVIDIA<REGISTERED> JETSON ORIN NANO<TRADEMARK>...
45426,LOCTITE<REGISTERED> EA E-214HP<TRADEMARK>
45905,LAUNCHXL-F2800157 TMS320F2800157 LAUNCHPAD<TRA...


In [296]:
df_cleaned[df_cleaned['ITEM_NAME'].isin(['AUXILIARY OUTPUTS 2', 'CONSUMABLE COMPONENT'])]

Unnamed: 0,#,DOCTYPE,CANCELED,DOCCUR,DOCRATE,DOCSTATUS,PO_NUM,BASE_TYPE,BASE_NUMBER,BASE_LINE,...,MANUALLY_CLOSED_PO,PO_OWNER_NAME,PYMNTGROUP,HEADER,FOOTER,BRANCH_NAME,IMP_OR_EXP,COST_SAVING,COST_SAVING_CRITERIA,FORCE_CLOSED_PO_QTY


In [262]:
po_unique_locations_df = grouped_unique_df(df_cleaned, 'PO_NUM', 'LOCATION')
po_unique_locations_df[po_unique_locations_df['LOCATION_COUNT'] > 1] # count = 1231
# po_unique_locations_df # count = 15100

Unnamed: 0,PO_NUM,LOCATION,LOCATION_COUNT
12668,232471356,"[GUJRAT, TRICHY, KOLKATA, NOIDA, KARNATAKA, US...",9
12753,232471441,"[USA, SEA, MAHARASHTRA, KARNATAKA, EMEA, NOIDA...",8
5779,75332,"[VISHNUVAKKAM, MADHYA PRADESH, WEST BENGAL, MA...",7
13797,232472485,"[BANGALORE, RAJASTHAN, USA, DAMAN, HARYANA, MA...",7
3364,72917,"[KOLKATA, CHENNAI, BADLI, MADHYA PRADESH, PANC...",7
...,...,...,...
14068,232472756,"[DAMAN, KARNATAKA]",2
14525,242530061,"[EMEA, DAMAN]",2
14070,232472758,"[USA, DAMAN]",2
7086,232430487,"[NOIDA, RAJASTHAN]",2


In [214]:
electrical_parts = pd.read_csv(
    os.path.join(os.getenv('ELECTRICAL_PARTS_FOLDER_PATH'), 'Electrical Parts Report Modified.csv'),
)
electrical_parts.columns = electrical_parts.columns.str.replace(' ', '_').str.upper()

In [215]:
electrical_parts.shape

(9359, 21)

In [216]:
pd.DataFrame(electrical_parts.nunique())

Unnamed: 0,0
PART_ID,9351
PART_NAME,5530
PART_DESCRIPTION,6846
PART_OWNER,3
PART_REVISION,2
REVISION_DATE,3205
PRODUCT_GROUP,64
PRODUCT_SUBGROUP,83
WIDTH_(MM),637
HEIGHT_(MM),738


In [217]:
validate_non_ascii(electrical_parts[['PART_NAME']])

Unnamed: 0,PART_NAME
622,250 µs/ch
650,"45° control valve with flange, heavy series"
651,"45° control valve with flange, light series"
652,"45° control valve with union nut, heavy series"
653,"45° control valve with union nut, light series"
...,...
9354,"ÖLFLEX® CLASSIC 100 5G1,5"
9355,"ÖLFLEX® CLASSIC 100 H 5G2,5"
9356,"ÖLFLEX® FD ROBUST 756 C 4 G 2,5+(2x1)"
9357,"ÖLFLEX® SERVO 700 4G1,5+(2x0,75)FDF"


In [219]:
electrical_parts_cleaned = pd.DataFrame(electrical_parts.apply(lambda col: col.apply(clean_text)))
electrical_parts_cleaned = electrical_parts_cleaned.apply(lambda col: col.apply(to_uppercase))
electrical_parts_cleaned.head(20)

Unnamed: 0,PART_ID,PART_NAME,PART_DESCRIPTION,PART_OWNER,PART_REVISION,REVISION_DATE,PRODUCT_GROUP,PRODUCT_SUBGROUP,WIDTH_(MM),HEIGHT_(MM),...,MOUNTING_CLEARANCES_(MM),WEIGHT_(KG),MANUFACTURER,MANUFACTURER_PART_NUMBER,EPLAN_PART_NUMBER,OLD_PLM_ID,OLD_SAP_ID,RELEASED_STATUS,RELEASED_DATE,PART_CATEGORY
0,41237,"CABLE GLAND- DOUBLE COMPRESSION CABLE, M90","CABLE GLAND DOUBLE COMPRESSION, SS304, OD:72-8...",KUMARI MONIKA (KUMARIMONIKA),0,31-AUG-2023 16:47,0.0,0.0,0.0,0.0,...,,0.0,CGS,CGW 014,CGS.CGW 014,,,PRODUCTION,18-SEP-2023 10:46,ELECTRICAL PART
1,52897,"E-STOP- LED, SELF MONTORING, 2CH",EMERGENCY PUSH BUTTON,KUMARI MONIKA (KUMARIMONIKA),0,01-MAR-2024 16:47,12.0,1.0,0.0,0.0,...,,2.7,ROCKWELL,800F-1YMD81,A-B.800F-1YMD81,,,PRODUCTION,08-MAR-2024 11:36,ELECTRICAL PART
2,54675,"INTERFACE MODULE-PLC,RS485",CPU - CENTRAL PROCESSING UNITS NX1P RS-422/485...,KUMARI MONIKA (KUMARIMONIKA),0,11-APR-2024 12:38,26.0,1.0,0.0,0.0,...,,0.0,OMRON,NX1W-CIF11,OMR.NX1W-CIF11,,,PRODUCTION,12-APR-2024 10:46,ELECTRICAL PART
3,57680,"BRAKE CONNECTOR-FOR HF-SE/SN/SP/JP,HG-SN/SR,HK...","BRAKE CONNECTOR FOR HF-SE/SN/SP/JP,HG-SN/SR,HK...",KUMARI MONIKA (KUMARIMONIKA),0,31-MAY-2024 12:11,129.0,1.0,90.0,60.0,...,,0.05,MITSUBISHI,MR-BKCNS1-2M-L,MIT.MR-BKCNS1-2M-L,,,PRODUCTION,31-MAY-2024 16:43,ELECTRICAL PART
4,64912,CONTACT-1NO,WITHOUT HOLDER - NON-ILLUMINATED - SINGLE CONT...,KUMARI MONIKA (KUMARIMONIKA),0,21-SEP-2024 14:05,6.0,194.0,10.0,44.0,...,,0.013,ABB,1SFA611610R1001,ABB.1SFA611610R1001,,,PRODUCTION,23-SEP-2024 13:59,ELECTRICAL PART
5,58580,HANDLE - SAFETY DOOR SWITCH,"HANDLE - SAFETY DOOR SWITCH,SIGNAL RED DOOR HA...",KUMARI MONIKA (KUMARIMONIKA),0,12-JUN-2024 16:48,1.0,0.0,0.0,0.0,...,,0.81,EUCHNER,100465,EUC.MGB-E-A-100465,,,PRODUCTION,14-JUN-2024 16:52,ELECTRICAL PART
6,18809,"WIRE 1.5 SQMM YELLOW, 1X1.5 FR-LSH, LAPP","PN: 4620111U LAPP, WIRE 1.5 SQMM YELLOW, 1X1.5...",KUMARI MONIKA (KUMARIMONIKA),0,21-OCT-2022 19:15,29.0,1.0,0.0,0.0,...,,0.0,LAPP,4620111U,LAPP.4620111U,,,PRODUCTION,13-DEC-2022 15:25,ELECTRICAL PART
7,18375,"""""RUN"""" OUTPUT RELAY""\tNOS\t0.47\t90\t70\t90\t...",,KUMARI MONIKA (KUMARIMONIKA),0,21-OCT-2022 11:52,1.0,1.0,,,...,,,,30 W,"OMR.NJ-PA3001\t\tNJ-PA3001\tNJ-PA3001""MACHINE ...",,,,,ELECTRICAL PART
8,57591,0.14 MM PIN LUGS,LUGS PRE-INSULATED END FERRULE 0.14SQMM/L-8MM,KUMARI MONIKA (KUMARIMONIKA),0,30-MAY-2024 11:03,3.0,1.0,0.0,0.0,...,,0.0,ELPRESS,"A0,14-8ETD","ELP.A0,14-8ETD",,,PRODUCTION,30-MAY-2024 14:34,ELECTRICAL PART
9,17799,0.25 SQ/MM WIRE WHITE/BLUE,WIRE; LIY; STRANDED; CU; 0.25MM2; PVC; WHITE-B...,KUMARI MONIKA (KUMARIMONIKA),0,19-OCT-2022 16:18,29.0,1.0,,,...,,,LAPP,4502442S,LAPP.4502442S,,,,,ELECTRICAL PART


In [222]:
electrical_parts_cleaned[['PART_NAME']]

Unnamed: 0,PART_NAME
0,"CABLE GLAND- DOUBLE COMPRESSION CABLE, M90"
1,"E-STOP- LED, SELF MONTORING, 2CH"
2,"INTERFACE MODULE-PLC,RS485"
3,"BRAKE CONNECTOR-FOR HF-SE/SN/SP/JP,HG-SN/SR,HK..."
4,CONTACT-1NO
...,...
9354,"LFLEX<REGISTERED> CLASSIC 100 5G1,5"
9355,"LFLEX<REGISTERED> CLASSIC 100 H 5G2,5"
9356,"LFLEX<REGISTERED> FD ROBUST 756 C 4 G 2,5+(2X1)"
9357,"LFLEX<REGISTERED> SERVO 700 4G1,5+(2X0,75)FDF"


In [223]:
validate_non_ascii(electrical_parts_cleaned[['PART_NAME']])

Unnamed: 0,PART_NAME


In [272]:
grouped_unique_df(electrical_parts_cleaned, ['PART_NAME', 'PART_DESCRIPTION'], 'PART_ID')

Unnamed: 0,PART_NAME,PART_DESCRIPTION,PART_ID,PART_ID_COUNT
212,AUXILIARY OUTPUTS 2,SCREW TERMINALS,"[20786, 21746, 21816, 19303, 18458, 20353, 185...",555
1649,"COLOR: GREEN-YELLOW;""\tNOS\t0.0247\t50.192303\...","DE);""\t1\t3\t4\t100\t8\t3","[19309, 22361, 20317, 18469, 18542, 22193, 192...",555
3345,HYDRAULIC TUBE,HYDRAULIC TUBE,"[46720, 46343, 46535, 46542, 46543, 46544, 465...",13
2381,CROSS-CONNECTOR FOR TERMINAL BLOCK,"PLUG-IN BRIDGE, PITCH: 5.2 MM, COLOR: RED","[50737, 50703, 50659, 50660, 50738, 50742, 50739]",7
2779,ENCLOSURE-JUNCTION BOX,JUNCTION BOX,"[42200, 42209, 42207, 42201, 42199]",5
...,...,...,...,...
2330,COVER,HEAVY DUTY POWER CONNECTORS SB120 DUST COVER/R...,[24533],1
2329,COVER,FUSE HOLDER COVER,[18069],1
2328,COVER,COVER FOR POWER CONNECTOR,[29061],1
2327,"COUPLING CONTACTOR RELAY, 2NO+2NC","COUPLING CONTACTOR RELAY, 2NO+2NC DC 24V, 0.7....",[46467],1


In [273]:
electric_parts_grouped_df = grouped_unique_df(electrical_parts_cleaned, ['PART_NAME', 'PART_DESCRIPTION', 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)'], 'PART_ID')
electric_parts_grouped_df
# electric_parts_grouped_df[electric_parts_grouped_df['PART_NAME'] == 'CABLE']

Unnamed: 0,PART_NAME,PART_DESCRIPTION,WIDTH_(MM),HEIGHT_(MM),DEPTH_(MM),WEIGHT_(KG),PART_ID,PART_ID_COUNT
2273,HYDRAULIC TUBE,HYDRAULIC TUBE,0.0,0.0,0.0,0.00,"[46720, 46343, 46535, 46542, 46543, 46544, 465...",13
261,BMS TO CANET,BMS TO CANET AND KEY SWITCH,0.0,0.0,0.0,0.00,"[56476, 48128, 56358]",3
992,CB TO CANNET POWER,CANET POWER WHS,0.0,0.0,0.0,0.00,"[56383, 48164, 56472]",3
4449,TB2 TO WLAN,WLAN POWER TO TB AND CB,0.0,0.0,0.0,0.00,"[56372, 56474, 48133]",3
4448,TB2 TO WIFI HUB,TB TO EHERNET HUB,0.0,0.0,0.0,0.00,"[56371, 48134, 56473]",3
...,...,...,...,...,...,...,...,...
1647,DIN RAIL- 15,DIN RAIL PERFORATED-15MM/M4 SCREW WIDTH,15.0,1000.0,5.5,0.00,[48320],1
1646,"DIN RAIL, UNPERFORATED","DIN RAIL, UNPERFORATED, WIDTH:35 MM, HEIGHT:7....",35.0,7.5,2000.0,0.23,[53311],1
1645,"DIN RAIL, UNPERFORATED","DIN RAIL, UNPERFORATED, WIDTH:35 MM, HEIGHT:15...",35.0,15.0,2000.0,1.79,[53306],1
1644,"DIN RAIL, UNPERFORATED","DIN RAIL, UNPERFORATED, STANDARD PROFILE, WIDT...",35.0,7.5,2000.0,0.69,[53302],1


In [288]:
dimension_nan_df = electrical_parts_cleaned[
    electrical_parts_cleaned['WIDTH_(MM)'].isna() & electrical_parts_cleaned['HEIGHT_(MM)'].isna() & electrical_parts_cleaned['DEPTH_(MM)'].isna()
].copy()
dimension_nan_df

Unnamed: 0,PART_ID,PART_NAME,PART_DESCRIPTION,PART_OWNER,PART_REVISION,REVISION_DATE,PRODUCT_GROUP,PRODUCT_SUBGROUP,WIDTH_(MM),HEIGHT_(MM),...,MOUNTING_CLEARANCES_(MM),WEIGHT_(KG),MANUFACTURER,MANUFACTURER_PART_NUMBER,EPLAN_PART_NUMBER,OLD_PLM_ID,OLD_SAP_ID,RELEASED_STATUS,RELEASED_DATE,PART_CATEGORY
7,18375,"""""RUN"""" OUTPUT RELAY""\tNOS\t0.47\t90\t70\t90\t...",,KUMARI MONIKA (KUMARIMONIKA),0,21-OCT-2022 11:52,1.0,1.0,,,...,,,,30 W,"OMR.NJ-PA3001\t\tNJ-PA3001\tNJ-PA3001""MACHINE ...",,,,,ELECTRICAL PART
9,17799,0.25 SQ/MM WIRE WHITE/BLUE,WIRE; LIY; STRANDED; CU; 0.25MM2; PVC; WHITE-B...,KUMARI MONIKA (KUMARIMONIKA),0,19-OCT-2022 16:18,29.0,1.0,,,...,,,LAPP,4502442S,LAPP.4502442S,,,,,ELECTRICAL PART
12,17798,0.5 SQ/MM WIRE WHITE/BLUE,"LAPP KABEL<REGISTERED> MULTI-STANDARD SC 2.1, ...",KUMARI MONIKA (KUMARIMONIKA),0,19-OCT-2022 16:18,29.0,1.0,,,...,,,LAPP,4160144K,LAPP.4160144K,,,,,ELECTRICAL PART
13,17802,0.75 SQ/MM WIRE WHITE/BLUE,"MULTI-STANDARD SC 2.1 0,75_",KUMARI MONIKA (KUMARIMONIKA),0,19-OCT-2022 16:18,29.0,1.0,,,...,,,LAPP,4160244K,LAPP.4160244K,,,,,ELECTRICAL PART
14,10071,10071,,HIMANSHU MEHTA (HIMANSHUMEHTA),0,16-APR-2024 15:45,,,,,...,,,,,,,,,,ELECTRICAL PART
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9330,37858,"WIRELESS ESTOP-WOL,SELF M,RX,1CH",DC WIRELESS E-STOP RECEIVER,KUMARI MONIKA (KUMARIMONIKA),0,29-JUN-2023 18:43,12.0,208.0,,,...,,,KAR-TECH,3A5483A,KAR.3A5483A,,,PRODUCTION,05-JUL-2023 18:42,ELECTRICAL PART
9331,37857,"WIRELESS ESTOP-WOL,SELF M,TX,1CH",WIRELESS E-STOP WALL MOUNT TRANSMITTER,KUMARI MONIKA (KUMARIMONIKA),0,29-JUN-2023 18:43,12.0,208.0,,,...,,,KAR-TECH,3A548NA,KAR.3A548NA,,,PRODUCTION,05-JUL-2023 18:42,ELECTRICAL PART
9335,28316,Y CONNECTOR,Y DISTRIBUTOR SYL-1208-G02MASCO,KUMARI MONIKA (KUMARIMONIKA),0,30-DEC-2022 11:31,4.0,215.0,,,...,,,SICK,6048329,SICK.6048329,,,PRODUCTION,02-JAN-2023 13:39,ELECTRICAL PART
9345,46983,LFLEX 540 P 5G4,POWER AND CONTROL CABLES / HARSH USE CONDITION...,KUMARI MONIKA (KUMARIMONIKA),0,28-NOV-2023 11:10,29.0,1.0,,,...,,,LAPP,124763,LAPP.00124763,,,,,ELECTRICAL PART


In [292]:
grouped_unique_df(dimension_nan_df, ['PART_NAME', 'PART_DESCRIPTION'], 'PART_ID')

Unnamed: 0,PART_NAME,PART_DESCRIPTION,PART_ID,PART_ID_COUNT
502,"COLOR: GREEN-YELLOW;""\tNOS\t0.0247\t50.192303\...","DE);""\t1\t3\t4\t100\t8\t3","[19309, 22361, 20317, 18469, 18542, 22193, 192...",555
83,AUXILIARY OUTPUTS 2,SCREW TERMINALS,"[20786, 21746, 21816, 19303, 18458, 20353, 185...",555
638,CONSUMABLE COMPONENT,FABRICATION OF WALL MOUNTING RACK,"[28799, 29306]",2
1356,STICKER,PANEL WARNING SIGN: 400VAC/230VAC,"[33921, 33920]",2
772,END PLATE,END AND INTERMEDIATE PLATE; 1 MM THICK; GRAY,"[34770, 33024]",2
...,...,...,...,...
524,"CONDUIT GLAND-POLYAMIDE CABLE,PG21","CONDUIT GLAND-POLYAMIDE CABLE,PG21,BLACK",[42252],1
523,"CONDUIT GLAND-POLYAMIDE CABLE,PG13.5","CONDUIT GLAND-POLYAMIDE CABLE,PG13.5,BLACK",[42250],1
522,"CONDUIT GLAND-POLYAMIDE CABLE,M20","CONDUIT GLAND-POLYAMIDE CABLE,M20,BLACK",[42243],1
521,CONDUIT GLAND,POLYAMIDE CONDUIT GLANDS BLACK - PG 13.5 THREAD,[29177],1


In [390]:
merged_data = pd.merge(df_cleaned, electrical_parts_cleaned, how="inner", left_on="ITEM_NAME", right_on="PART_NAME")

In [391]:
merged_data.shape

(328606, 80)

In [363]:
merged_data[merged_data['ITEM_CODE'] == merged_data['PART_ID']]['PART_ID'].count()

1907

In [364]:
merged_data[merged_data['ITEM_CODE'] == merged_data['PART_ID']][['ITEM_CODE', 'ITEM_NAME', 'PART_ID', 'PART_NAME']].nunique()

ITEM_CODE    507
ITEM_NAME    340
PART_ID      507
PART_NAME    340
dtype: int64

In [396]:
merged_data[merged_data['PO_NUM'] == 232433828][['ITEM_NAME', 'ITEM_CODE']]
# [['ITEM_CODE', 'ITEM_NAME', 'PART_ID', 'PART_NAME']].nunique()

Unnamed: 0,ITEM_NAME,ITEM_CODE
38191,WIRE CLAMP - 18MM,4312600


In [305]:
min_po = merged_data['PO_NUM'].min()
max_po = merged_data['PO_NUM'].max()
mean_po = merged_data['PO_NUM'].mean()
median_po = merged_data['PO_NUM'].median()
unique_count_po = merged_data['PO_NUM'].nunique()

print(min_po, max_po, mean_po, median_po, unique_count_po)

53586 242530642 156955410.28671417 232431665.0 2951


In [372]:
unique_pos = merged_data['PO_NUM'].unique()
electric_line_items = df_cleaned[df_cleaned['PO_NUM'].isin(unique_pos)]
# electric_line_items # count = 12437

# electric_line_items['ITEM_CODE'].nunique() # count = 4854
# electric_line_items[electric_line_items['ITEM_CODE'].isnull()]['#']

# unique_item_codes = electric_line_items['ITEM_CODE'].unique()
# unique_item_codes

grouped_unique_df(electric_line_items, 'PO_NUM', 'ITEM_NAME')

# unique id of line items that has item code defined as NaN
# electric_line_items[electric_line_items['#'].isin([2849, 30742, 30743, 30744, 30745, 30746, 30747, 30748])][['ITEM_CODE', 'ITEM_NAME', 'FG_ITEM_DESCRIPTION']]

Unnamed: 0,PO_NUM,ITEM_NAME,ITEM_NAME_COUNT
1717,232433828,"[CAP CER 1UF 50V X5R 0603: CL10A105KB8NNNC, PR...",148
986,75452,[RES SMD 100K OHM 1% 1/10W 0603: RC0603FR-0710...,107
1444,232431786,"[RPS-120-12-C AC/DC CONVERTER 12V 84W, TCMD100...",87
1744,232434204,"[FUSE, RES SMD 10K OHM 1% 1/10W 0603: RC0603FR...",81
100,57961,"[POWER AND CONTROL CABLE 12X0.5 12.4 MM, CONDU...",78
...,...,...,...
1458,232431853,[DRIVE],1
1457,232431850,"[MCB-2P,40A]",1
1455,232431831,[DRIVE],1
1453,232431817,[CABLE TRAY],1


In [387]:
electric_line_items[electric_line_items['PO_NUM'] == 232433828][['ITEM_CODE', 'ITEM_NAME']].nunique()

ITEM_CODE    148
ITEM_NAME    148
dtype: int64

In [371]:
electric_line_items['ITEM_NAME'].nunique()

3669

In [350]:
electrical_parts_cleaned[electrical_parts_cleaned['PART_NAME'].isin(['STICKER', 'COMMISSIONING & INSTALLATION'])]

Unnamed: 0,PART_ID,PART_NAME,PART_DESCRIPTION,PART_OWNER,PART_REVISION,REVISION_DATE,PRODUCT_GROUP,PRODUCT_SUBGROUP,WIDTH_(MM),HEIGHT_(MM),...,MOUNTING_CLEARANCES_(MM),WEIGHT_(KG),MANUFACTURER,MANUFACTURER_PART_NUMBER,EPLAN_PART_NUMBER,OLD_PLM_ID,OLD_SAP_ID,RELEASED_STATUS,RELEASED_DATE,PART_CATEGORY
3457,26241,COMMISSIONING & INSTALLATION,COMMISSIONING & INSTALLATION,KUMARI MONIKA (KUMARIMONIKA),0,01-DEC-2022 11:25,1.0,1.0,,,...,,,TECHVISION,CCTVINSTA,TV.CCTVINSTA,,,PRODUCTION,01-DEC-2022 12:18,ELECTRICAL PART
8188,31152,STICKER,PANEL WARNING SIGN: 440VAC/230VAC,KUMARI MONIKA (KUMARIMONIKA),0,13-FEB-2023 12:17,17.0,1.0,,,...,,,SONI,PWS-440/230-CA,SONI.PWS-440/230-CA,,,PRODUCTION,13-FEB-2023 12:31,ELECTRICAL PART
8189,31153,STICKER,PANEL WARNING SIGN: 230VAC,KUMARI MONIKA (KUMARIMONIKA),0,13-FEB-2023 12:17,17.0,1.0,,,...,,,SONI,PWS-230-CA,SONI.PWS-230-CA,,,PRODUCTION,13-FEB-2023 12:31,ELECTRICAL PART
8190,34794,STICKER,PANEL WARNING SIGN: 230VAC,KUMARI MONIKA (KUMARIMONIKA),0,04-MAY-2023 16:28,17.0,1.0,,,...,,,SAHARA LABELS,PWS-230-CA,SAHARA.PWS-230-CA,,,PRODUCTION,09-MAY-2023 17:53,ELECTRICAL PART
8191,34793,STICKER,PANEL WARNING SIGN: AUTHORIZED ACCESS,KUMARI MONIKA (KUMARIMONIKA),0,04-MAY-2023 16:28,17.0,1.0,,,...,,,SAHARA LABELS,PWS-AS-CA,SAHARA.PWS-AS-CA,,,PRODUCTION,09-MAY-2023 17:53,ELECTRICAL PART
8192,31151,STICKER,PANEL WARNING SIGN: DANGER/ELECTROCUTION,KUMARI MONIKA (KUMARIMONIKA),0,13-FEB-2023 12:17,17.0,1.0,,,...,,,SONI,PWS-D/S-CA,SONI.PWS-D/S-CA,,,PRODUCTION,13-FEB-2023 12:31,ELECTRICAL PART
8193,31159,STICKER,PANEL WARNING SIGN: 480VAC,KUMARI MONIKA (KUMARIMONIKA),0,13-FEB-2023 12:17,17.0,1.0,,,...,,,SONI,PWS-480-CA,SONI.PWS-480-CA,,,PRODUCTION,13-FEB-2023 12:31,ELECTRICAL PART
8194,31158,STICKER,PANEL WARNING SIGN: 480VAC/110VAC,KUMARI MONIKA (KUMARIMONIKA),0,13-FEB-2023 12:17,17.0,1.0,,,...,,,SONI,PWS-480/110-CA,SONI.PWS-480/110-CA,,,PRODUCTION,13-FEB-2023 12:31,ELECTRICAL PART
8195,31155,STICKER,PANEL WARNING SIGN: AUTHORIZED ACCESS,KUMARI MONIKA (KUMARIMONIKA),0,13-FEB-2023 12:17,17.0,1.0,,,...,,,SONI,PWS-AS-CA,SONI.PWS-AS-CA,,,PRODUCTION,13-FEB-2023 12:31,ELECTRICAL PART
8196,33921,STICKER,PANEL WARNING SIGN: 400VAC/230VAC,KUMARI MONIKA (KUMARIMONIKA),0,17-APR-2023 10:40,17.0,1.0,,,...,,,SONI,PWS-400-CA,SONI.PWS-400-CA,,,PRODUCTION,17-APR-2023 10:50,ELECTRICAL PART


In [366]:
electrical_parts_cleaned[electrical_parts_cleaned['PART_ID'].isin(unique_item_codes)]

Unnamed: 0,PART_ID,PART_NAME,PART_DESCRIPTION,PART_OWNER,PART_REVISION,REVISION_DATE,PRODUCT_GROUP,PRODUCT_SUBGROUP,WIDTH_(MM),HEIGHT_(MM),...,MOUNTING_CLEARANCES_(MM),WEIGHT_(KG),MANUFACTURER,MANUFACTURER_PART_NUMBER,EPLAN_PART_NUMBER,OLD_PLM_ID,OLD_SAP_ID,RELEASED_STATUS,RELEASED_DATE,PART_CATEGORY
6,18809,"WIRE 1.5 SQMM YELLOW, 1X1.5 FR-LSH, LAPP","PN: 4620111U LAPP, WIRE 1.5 SQMM YELLOW, 1X1.5...",KUMARI MONIKA (KUMARIMONIKA),0,21-OCT-2022 19:15,29.0,1.0,0.0,0.0,...,,0.0,LAPP,4620111U,LAPP.4620111U,,,PRODUCTION,13-DEC-2022 15:25,ELECTRICAL PART
9,17799,0.25 SQ/MM WIRE WHITE/BLUE,WIRE; LIY; STRANDED; CU; 0.25MM2; PVC; WHITE-B...,KUMARI MONIKA (KUMARIMONIKA),0,19-OCT-2022 16:18,29.0,1.0,,,...,,,LAPP,4502442S,LAPP.4502442S,,,,,ELECTRICAL PART
10,18776,"0.5 SQ.MM SINGLE CORE CABLE - BLUE, LAPP",0.5 SQ.MM WIRE - BLUE MAKE 4610021U LAPP,KUMARI MONIKA (KUMARIMONIKA),0,21-OCT-2022 19:15,29.0,1.0,0.0,0.0,...,,0.0,LAPP,4610021U,LAPP.4610021U,,,PRODUCTION,28-NOV-2022 23:13,ELECTRICAL PART
11,18780,0.5 SQ.MM WIRE - WHITE (FRLS),0.5 SQ.MM WIRE - WHITE (FRLS) 4610051U LAPP,KUMARI MONIKA (KUMARIMONIKA),0,21-OCT-2022 19:15,29.0,1.0,0.0,0.0,...,,0.0,LAPP,4610051U,LAPP.4610051U,,,PRODUCTION,28-NOV-2022 23:13,ELECTRICAL PART
12,17798,0.5 SQ/MM WIRE WHITE/BLUE,"LAPP KABEL<REGISTERED> MULTI-STANDARD SC 2.1, ...",KUMARI MONIKA (KUMARIMONIKA),0,19-OCT-2022 16:18,29.0,1.0,,,...,,,LAPP,4160144K,LAPP.4160144K,,,,,ELECTRICAL PART
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9282,17574,WIRE 4G 0.5 SQMM,LFLEX<REGISTERED> 100 I 4 G 0.50 GY POWER AND ...,KUMARI MONIKA (KUMARIMONIKA),0,19-OCT-2022 16:16,29.0,1.0,0.0,0.0,...,,0.0,LAPP,38007036,LAPP.38007036,,,PRODUCTION,16-AUG-2024 14:11,ELECTRICAL PART
9286,18074,WIRE ARMOURED CABLES,"1.1 KV, 1 CORES AL/ CU CONDUCTOR, XLPE INSULAT...",KUMARI MONIKA (KUMARIMONIKA),0,20-OCT-2022 16:52,29.0,1.0,,,...,,,RR KABEL LIMITED,382001580,RRK.382001580,,,,,ELECTRICAL PART
9297,18117,WIRE HOUSING,HEADERS & WIRE HOUSINGS WR-PHD 2.54MM HDR 6P D...,KUMARI MONIKA (KUMARIMONIKA),0,20-OCT-2022 16:52,3.0,15.0,,,...,,,WRTH ELEKTRONIK EISOS GMBH &,61300621121,WE.61300621121,,,,,ELECTRICAL PART
9299,18767,"WIRE RED 4 SQMM, LAPP","PN: 4520043U LAPP, WIRE RED 4 SQMM",KUMARI MONIKA (KUMARIMONIKA),0,21-OCT-2022 19:14,29.0,1.0,0.0,0.0,...,,0.0,LAPP,4520043U,LAPP.4520043U,,,PRODUCTION,19-JAN-2024 18:02,ELECTRICAL PART


In [301]:
# , 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)'
grouped_unique_df(merged_data, ['PO_NUM', 'ITEM_NAME'], 'SUPPLIER_CODE')

Unnamed: 0,PO_NUM,ITEM_NAME,SUPPLIER_CODE,SUPPLIER_CODE_COUNT
0,53586,SINGLE BOARD COMPUTER,[VD001614],1
3645,232470512,SPLIT GLAND,[VD004084],1
3653,232470521,Y-LINK FOR CONNECTION OF SINGLE-CHANNEL DP SLA...,[VD001966],1
3652,232470521,"VFD-1.5KW,3 PHASE,480V",[VD001966],1
3651,232470521,RS485 REPEATER FOR CONNECTION OF PROFIBUS/MPI ...,[VD001966],1
...,...,...,...,...
1822,75030,REFLECTION LIGHT BARRIER,[VD000068],1
1821,75030,CONNECTOR,[VD000068],1
1820,75030,CABLE,[VD000068],1
1819,75025,CAT 6 UTP CABLE,[VD002270],1


In [229]:
filtered_data = merged_data.drop_duplicates(subset=['PO_NUM', 'SUPPLIER_NAME', 'PART_NAME', 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)'], keep='first')
filtered_data.shape

(29703, 80)

In [232]:
pd.DataFrame(filtered_data[['PO_NUM', 'SUPPLIER_NAME', 'LOCATION', 'BUYER', 'ITEM_NAME', 'PART_NAME', 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)']].nunique())

Unnamed: 0,0
PO_NUM,2951
SUPPLIER_NAME,242
LOCATION,49
BUYER,33
ITEM_NAME,978
PART_NAME,978
WIDTH_(MM),268
HEIGHT_(MM),331
DEPTH_(MM),287
WEIGHT_(KG),349


In [256]:
filtered_parts_grouped_df = grouped_unique_df(filtered_data, ['PART_NAME', 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)'], 'PART_ID')
filtered_parts_grouped_df

Unnamed: 0,PART_NAME,PART_DESCRIPTION,WIDTH_(MM),HEIGHT_(MM),DEPTH_(MM),WEIGHT_(KG),PART_ID,PART_ID_COUNT
0,"0.5 SQ.MM SINGLE CORE CABLE - BLUE, LAPP",0.5 SQ.MM WIRE - BLUE MAKE 4610021U LAPP,0.0,0.0000,0.0,0.0000,[18776],1
770,"RCCB-4P,63A","RESIDUAL CURRENT CIRCUIT BREAKER (RCCB), 63A, ...",70.0,71.0000,80.0,0.3200,[61310],1
676,PATCH CORD COPPER,PATCH CORD COPPER (TWISTED PAIR) INDUSTRY\nMAN...,0.0,0.0000,0.0,0.0000,[25128],1
677,PCB-5G MODULE,"5G SUB-6GHZ AND MMWAVE MODULE, MAX. DOWNLINK 3...",0.0,0.0000,0.0,0.0000,[52901],1
678,PCB-5G MODULE,"5G SUB-6GHZ AND MMWAVE MODULE, MAX. DOWNLINK 4...",30.0,52.0000,2.3,0.0100,[42686],1
...,...,...,...,...,...,...,...,...
347,ENCODER MODULE,HTL ENCODER INTERFACE MODULE,128.0,67.0000,190.0,150.0000,[22788],1
348,END BRACKET FOR TERMINAL BLOCK,"END CLAMP, MATERIAL:PA, COLOR:GRAY",9.5,48.6000,32.8,0.0141,[50735],1
349,END BRACKET FOR TERMINAL BLOCK,"END CLAMP, MATERIAL:PA, COLOR:GRAY",10.0,55.4863,50.0,0.0102,[50736],1
350,END CAP,"BB-EV-EC/2-3P - END CAP, 2+3-PHASE",0.0,0.0000,0.0,0.0000,[22796],1


In [259]:
po_number_counts_df = grouped_unique_df(filtered_data, 'PO_NUM', 'LOCATION')
# po_number_counts_df[po_number_counts_df['LOCATION_COUNT'] > 1] # count = 254
po_number_counts_df # count = 2951

Unnamed: 0,PO_NUM,LOCATION,LOCATION_COUNT
2295,232471441,"[USA, SEA, MAHARASHTRA, EMEA, NOIDA, KARNATAKA...",8
2914,242530451,"[USA, MAHARASHTRA, KARNATAKA, EMEA, DAMAN, NOIDA]",6
934,75119,"[MADHYA PRADESH, NOIDA, MAHARASHTRA, WEST BENGAL]",4
1686,232433356,"[KOLKATA, HARYANA, USA, WEST BENGAL]",4
2319,232471532,"[DAMAN, MAHARASHTRA, AUSTRALIA, GUJRAT]",4
...,...,...,...
1015,75609,[WEST BENGAL],1
1016,75633,[NOIDA],1
1017,75639,[NOIDA],1
1018,75668,[UP],1


In [113]:
# electrical_parts_cleaned['PART_NAME'].nunique() # 5526
# electrical_parts_cleaned['PART_NAME'].count() # 9359

part_name_counts = electrical_parts_cleaned['PART_NAME'].value_counts()
part_names_more_than_one = part_name_counts[part_name_counts > 1].sort_values(ascending=False)
part_names_more_than_one_df = part_names_more_than_one.reset_index()
part_names_more_than_one_df.columns = ['ITEM_NAME', 'COUNT']
display(part_names_more_than_one_df)

Unnamed: 0,ITEM_NAME,COUNT
0,Auxiliary outputs 2,555
1,"color: green-yellow;""\tNos\t0.0247\t50.192303\...",555
2,CABLE,175
3,Cable,127
4,Connector,93
...,...,...
814,RJ45 Female number of positions: RJ45,2
815,"PANEL LIGHT - WHITE, 18 WATT, 230 V AC",2
816,"SAFETY DOOR SWITCH-2NC , 24V DC",2
817,SAFETY ROPE-PULLEY,2


In [161]:
part_id_unique = electrical_parts_cleaned.groupby(['PART_NAME', 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)'])['PART_ID'].nunique()
part_id_unique.count()

4651

In [263]:
electrical_parts_cleaned.groupby(['PART_NAME', 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,PART_ID,PART_DESCRIPTION,PART_OWNER,PART_REVISION,REVISION_DATE,PRODUCT_GROUP,PRODUCT_SUBGROUP,MOUNTING_CLEARANCES_(MM),MANUFACTURER,MANUFACTURER_PART_NUMBER,EPLAN_PART_NUMBER,OLD_PLM_ID,OLD_SAP_ID,RELEASED_STATUS,RELEASED_DATE,PART_CATEGORY
PART_NAME,WIDTH_(MM),HEIGHT_(MM),DEPTH_(MM),WEIGHT_(KG),Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0.14 MM PIN LUGS,0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
"0.5 SQ.MM SINGLE CORE CABLE - BLUE, LAPP",0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
0.5 SQ.MM WIRE - WHITE (FRLS),0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
10.0 MM PIN LUGS MAKE : LAPP,0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
16 DIDO CARD,0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YELLOW GREEN WIRE/CABLE 2.5 SQMM,0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1
YELLOW GREEN WIRE/CABLE 6 SQMM,0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1
YELLOW WIRE/CABLE 0.75 SQMM,0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1
YELLOW WIRE/CABLE 10 SQMM,0.0,0.0,0.0,0.0,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1


In [264]:
electrical_parts_cleaned.groupby(['PART_NAME', 'PART_DESCRIPTION', 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,PART_ID,PART_OWNER,PART_REVISION,REVISION_DATE,PRODUCT_GROUP,PRODUCT_SUBGROUP,MOUNTING_CLEARANCES_(MM),MANUFACTURER,MANUFACTURER_PART_NUMBER,EPLAN_PART_NUMBER,OLD_PLM_ID,OLD_SAP_ID,RELEASED_STATUS,RELEASED_DATE,PART_CATEGORY
PART_NAME,PART_DESCRIPTION,WIDTH_(MM),HEIGHT_(MM),DEPTH_(MM),WEIGHT_(KG),Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0.14 MM PIN LUGS,LUGS PRE-INSULATED END FERRULE 0.14SQMM/L-8MM,0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
"0.5 SQ.MM SINGLE CORE CABLE - BLUE, LAPP",0.5 SQ.MM WIRE - BLUE MAKE 4610021U LAPP,0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
0.5 SQ.MM WIRE - WHITE (FRLS),0.5 SQ.MM WIRE - WHITE (FRLS) 4610051U LAPP,0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
10.0 MM PIN LUGS MAKE : LAPP,10.0 MM PIN LUGS MAKE : LAPP -61721960,0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
16 DIDO CARD,"SIMATIC S7-1200, DIGITAL INPUT SM 1221, 16 DI, 24VDC, SINK/SOURCE INPUT",0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YELLOW GREEN WIRE/CABLE 2.5 SQMM,"WIRE, STRANDED, HOOK UP, PVC, GREEN, YELLOW, 12 AWG, 4 MM<POW2>, 328 FT, 100 M",0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1
YELLOW GREEN WIRE/CABLE 6 SQMM,"WIRE, STRANDED, HOOK UP, PVC, GREEN, YELLOW, 10 AWG, 6 MM<POW2>, 328 FT, 100 M",0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1
YELLOW WIRE/CABLE 0.75 SQMM,"WIRE, STRANDED, HOOK UP, H05V-K, HAR, POWER/CONTROL, PVC, YELLOW, 0.75 MM<POW2>, 328 FT, 100 M",0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1
YELLOW WIRE/CABLE 10 SQMM,LAPP LFLEX<REGISTERED> HARMONIZED <HAR> HOOK-UP WIRE - TYPE H07V-K - 8 AWG - YELLOW,0.0,0.0,0.0,0.0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,1


In [265]:
electrical_parts_cleaned.groupby(['PART_NAME', 'PART_DESCRIPTION', 'WIDTH_(MM)', 'HEIGHT_(MM)', 'DEPTH_(MM)', 'WEIGHT_(KG)', 'MANUFACTURER']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,PART_ID,PART_OWNER,PART_REVISION,REVISION_DATE,PRODUCT_GROUP,PRODUCT_SUBGROUP,MOUNTING_CLEARANCES_(MM),MANUFACTURER_PART_NUMBER,EPLAN_PART_NUMBER,OLD_PLM_ID,OLD_SAP_ID,RELEASED_STATUS,RELEASED_DATE,PART_CATEGORY
PART_NAME,PART_DESCRIPTION,WIDTH_(MM),HEIGHT_(MM),DEPTH_(MM),WEIGHT_(KG),MANUFACTURER,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0.14 MM PIN LUGS,LUGS PRE-INSULATED END FERRULE 0.14SQMM/L-8MM,0.0,0.0,0.0,0.0,ELPRESS,1,1,1,1,1,1,0,1,1,0,0,1,1,1
"0.5 SQ.MM SINGLE CORE CABLE - BLUE, LAPP",0.5 SQ.MM WIRE - BLUE MAKE 4610021U LAPP,0.0,0.0,0.0,0.0,LAPP,1,1,1,1,1,1,0,1,1,0,0,1,1,1
0.5 SQ.MM WIRE - WHITE (FRLS),0.5 SQ.MM WIRE - WHITE (FRLS) 4610051U LAPP,0.0,0.0,0.0,0.0,LAPP,1,1,1,1,1,1,0,1,1,0,0,1,1,1
10.0 MM PIN LUGS MAKE : LAPP,10.0 MM PIN LUGS MAKE : LAPP -61721960,0.0,0.0,0.0,0.0,LAPP,1,1,1,1,1,1,0,1,1,0,0,1,1,1
16 DIDO CARD,"SIMATIC S7-1200, DIGITAL INPUT SM 1221, 16 DI, 24VDC, SINK/SOURCE INPUT",0.0,0.0,0.0,0.0,SIEMENS,1,1,1,1,1,1,0,1,1,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YELLOW GREEN WIRE/CABLE 2.5 SQMM,"WIRE, STRANDED, HOOK UP, PVC, GREEN, YELLOW, 12 AWG, 4 MM<POW2>, 328 FT, 100 M",0.0,0.0,0.0,0.0,LAPP,1,1,1,1,1,1,0,1,1,0,0,0,0,1
YELLOW GREEN WIRE/CABLE 6 SQMM,"WIRE, STRANDED, HOOK UP, PVC, GREEN, YELLOW, 10 AWG, 6 MM<POW2>, 328 FT, 100 M",0.0,0.0,0.0,0.0,LAPP,1,1,1,1,1,1,0,1,1,0,0,0,0,1
YELLOW WIRE/CABLE 0.75 SQMM,"WIRE, STRANDED, HOOK UP, H05V-K, HAR, POWER/CONTROL, PVC, YELLOW, 0.75 MM<POW2>, 328 FT, 100 M",0.0,0.0,0.0,0.0,LAPP,1,1,1,1,1,1,0,1,1,0,0,0,0,1
YELLOW WIRE/CABLE 10 SQMM,LAPP LFLEX<REGISTERED> HARMONIZED <HAR> HOOK-UP WIRE - TYPE H07V-K - 8 AWG - YELLOW,0.0,0.0,0.0,0.0,LAPP,1,1,1,1,1,1,0,1,1,0,0,0,0,1


In [76]:
not_included_electrical_parts = electrical_parts_cleaned[~electrical_parts_cleaned['PART_NAME'].isin(filtered_data['PART_NAME'])]
not_included_electrical_parts['PART_NAME'].nunique()

4436

In [71]:
not_included_supplier_name = df_cleaned[~df_cleaned['SUPPLIER_NAME'].isin(filtered_data['SUPPLIER_NAME'])]
not_included_supplier_name['SUPPLIER_NAME'].nunique()

1391

In [72]:
not_included_location = df_cleaned[~df_cleaned['LOCATION'].isin(filtered_data['LOCATION'])]
not_included_location['LOCATION'].nunique()

10

In [73]:
not_included_buyer = df_cleaned[~df_cleaned['BUYER'].isin(filtered_data['BUYER'])]
not_included_buyer['BUYER'].nunique()

69