# Fuzzy Match Example Notebook
Created 9/28/2022 by

Timothy Del Green<br />
1-256-335-0378<br />
tdgreen@outlook.com<br />
https://www.linkedin.com/in/timothy-del-green

## Imports

In [1]:
import difflib

## Fuzzy Match Example

In [None]:
%%sql location_one_query <<

SELECT
    A.IFITEM    AS ITEM_NO
    ,A.IFLOC    AS LOCATION_NO
    ,A.IFQOH    AS QTY_ON_HAND
    /* Concat a space between descriptions */
    ,TRIM(B.ICDSC1)||' '||TRIM(B.ICDSC2)||' '||TRIM(B.ICDSC3)   
                AS ITEM_DESC

FROM R50FILES.VINITMB AS A
    JOIN R50FILES.VINITEM AS B
        ON B.ICCMP = 1
        AND B.ICITEM = A.IFITEM

WHERE
        A.IFLOC = '301'
    AND A.IFQOH > 0

FOR READ ONLY


In [10]:
location_one_query.info()
location_one_query.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2030 entries, 0 to 2029
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   item_no      2030 non-null   object
 1   location_no  2030 non-null   object
 2   qty_on_hand  2030 non-null   object
 3   item_desc    2030 non-null   object
dtypes: object(4)
memory usage: 63.6+ KB


Unnamed: 0,item_no,location_no,qty_on_hand,item_desc
0,ABBLHS4X08025,301,100,LHSC M8 X 25 DIN 7984 A2 ST ST
1,ABBSBS006020,301,150,BLT SHLDR M6X8X20 7379-12.9 VMI
2,ABBSC2806012,301,500,SHCS M6 X 12 912 8.8 BLK OX VMI
3,ABBSC2808020,301,300,SHCS M8 X 20 912 8.8 BLK OX
4,ABBSC2810045,301,650,SHCS M10 X 45 912 8.8 BLK OX


In [None]:
%%sql location_two_query <<

SELECT
    A.IFITEM    AS ITEM_NO
    ,A.IFLOC    AS LOCATION_NO
    ,A.IFQOH    AS QTY_ON_HAND
    /* No space between descriptions. String were split without spaces */
    ,TRIM(B.ICDSC1)||''||TRIM(B.ICDSC2)||''||TRIM(B.ICDSC3)   
                AS ITEM_DESC

FROM R50FILES.VINITMB AS A
    JOIN R50FILES.VINITEM AS B
        ON B.ICCMP = 1
        AND B.ICITEM = A.IFITEM

WHERE
        A.IFLOC = '310'
    AND A.IFQOH > 0

FOR READ ONLY


In [11]:
location_two_query.info()
location_two_query.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7264 entries, 0 to 7263
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   item_no      7264 non-null   object
 1   location_no  7264 non-null   object
 2   qty_on_hand  7264 non-null   object
 3   item_desc    7264 non-null   object
dtypes: object(4)
memory usage: 227.1+ KB


Unnamed: 0,item_no,location_no,qty_on_hand,item_desc
0,.10C100H11Z,310,25,M10-1.5 X 100 HEX CAP SCREW GR10.9 DIN 931 ZINC
1,.10C10H81Z,310,976,M10-1.50 X 10 HEX C/S 8.8 DIN931 ZINC
2,.10C10KSF,310,18,M10-1.5 X 10 SOCKET SET SCREWFLAT POINT
3,.10C120H11Z,310,50,M10-1.5 X 120 HEX CAP SCREW GR10.9 DIN 931 ZINC
4,.10C12H81Z,310,327,M10-1.50 x 12 HEX CAP GR. 8.8ZN


In [8]:
def fuzzy_match_items(df1, df2):
    return (
        df2
            .assign(
                item_match = lambda df_ : df_['item_desc'].apply(lambda x : difflib.get_close_matches(
                    word=x,
                    possibilities=df1['item_desc'],
                    n=1,
                    cutoff=0.6
                ))
            )
            .pipe(lambda df_ : df_.explode('item_match'))
            .merge(
                df1,
                left_on='item_match',
                right_on='item_desc',
                suffixes=['_L1', '_L2']
            )
            .sort_values(by='item_match')
            .reset_index(drop=True)
    )

test = fuzzy_match_items(location_one_query, location_two_query)

test

Unnamed: 0,item_no_L1,location_no_L1,qty_on_hand_L1,item_desc_L1,item_match,item_no_L2,location_no_L2,qty_on_hand_L2,item_desc_L2
0,/CAL2-1032-130-T,310,1650,10-32 LARGE FLANGE NUTSERT TRIVALENT ZINC,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC,DOMHLN0313FLG55273,301,100800,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC
1,/14NSFL,310,1818,1/4-20 NYLON INSERT FLANGE L/NUT ZINC,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC,DOMHLN0313FLG55273,301,100800,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC
2,/NFNL6808000,310,1500,M8-1.25 NYLON INSERT FLANGE NUT CR3 ZINC,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC,DOMHLN0313FLG55273,301,100800,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC
3,25FNWFZ,310,4650,1/4-28 WHIZ FLANGE NUT GR 2 CR3 ZINC,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC,DOMHLN0313FLG55273,301,100800,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC
4,31C150BCG5Z,310,60,5/16-18 x 1-1/2 CARRIAGE BOLTGR. 5 ZINC,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC,DOMHLN0313FLG55273,301,100800,5/16-18 LRG SERR FLANGE NUT GRADE 2 ZINC
...,...,...,...,...,...,...,...,...,...
470,50NWSAZ/10BAG,310,150,WASHER-FLAT 1/2 TPA-N ZP,WASHER FLAT 13/16 VMI,IHP121690-01,301,140,WASHER FLAT 13/16 VMI
471,/17W07401B,310,894,.340 ID X .740 OD X .062 THK NYLON FLAT WASHER...,WSHR .443 X .75 X .06 NYLON NYLON WASHER,ERM20007601,301,76505,WSHR .443 X .75 X .06 NYLON NYLON WASHER
472,37NLIEZ,310,500,3/8 INT/EXT TOOTH L/W ZINC,WSHR INT TOOTH LOCK 1/4 ZNC,WLI02500,301,375,WSHR INT TOOTH LOCK 1/4 ZNC
473,10NLIEZ,310,300,#10 INT/EXT TOOTH L/W ZINC,WSHR INT TOOTH LOCK 1/4 ZNC,WLI02500,301,375,WSHR INT TOOTH LOCK 1/4 ZNC
