In [1]:
import pandas as pd
from fuzzywuzzy import fuzz



In [2]:
df = pd.read_csv("room_type.csv")
df.head()

Unnamed: 0,Expedia,Booking.com
0,"Deluxe Room, 1 King Bed",Deluxe King Room
1,"Standard Room, 1 King Bed, Accessible",Standard King Roll-in Shower Accessible
2,"Grand Corner King Room, 1 King Bed",Grand Corner King Room
3,"Suite, 1 King Bed (Parlor)",King Parlor Suite
4,"High-Floor Premium Room, 1 King Bed",High-Floor Premium King Room


There are several ways to compare two strings in Fuzzywuzzy.

* ratio: compares the entire string similarity, in order
* partial_ratio: compares partial string similarity.
* token_sort_ratio: ignores word order.
* token_set_ratio: ignores duplicated words (similar to token sort, but more flexible)

In [3]:
fuzz.ratio('Deluxe Room, 1 King Bed', 'Deluxe Room')

65

In [4]:
fuzz.ratio('Room, 2 Double Beds (19th to 25th Floors)', 'Two Double Beds - Location Room (19th to 25th Floors)')

74

In [5]:
fuzz.partial_ratio('Deluxe Room, 1 King Bed', 'Deluxe Room')

100

In [6]:
fuzz.partial_ratio('Room, 2 Double Beds (19th to 25th Floors)', 'Two Double Beds - Location Room (19th to 25th Floors)')

63

In [7]:
fuzz.token_sort_ratio('Deluxe Room, 1 King Bed', 'Deluxe King Room')

84

In [8]:
fuzz.token_sort_ratio('Traditional Double Room, 2 Double Beds', 'Double Room with Two Double Beds')

78

In [9]:
fuzz.token_sort_ratio('Room, 2 Double Beds (19th to 25th Floors)', 'Two Double Beds - Location Room (19th to 25th Floors)')

83

In [10]:
fuzz.token_set_ratio('Deluxe Room, 1 King Bed', 'Deluxe King Room')

100

In [11]:
fuzz.token_set_ratio('Traditional Double Room, 2 Double Beds', 'Double Room with Two Double Beds')

78

In [12]:
fuzz.token_set_ratio('Room, 2 Double Beds (19th to 25th Floors)', 'Two Double Beds - Location Room (19th to 25th Floors)')

97

In [13]:
def get_ratio(row):
    name = row['Expedia']
    name1 = row['Booking.com']
    return fuzz.token_set_ratio(name, name1)
len(df[df.apply(get_ratio, axis=1) > 70]) / len(df)

0.9029126213592233

In [14]:
df[df.apply(get_ratio, axis=1) > 70].head(10)

Unnamed: 0,Expedia,Booking.com
0,"Deluxe Room, 1 King Bed",Deluxe King Room
1,"Standard Room, 1 King Bed, Accessible",Standard King Roll-in Shower Accessible
2,"Grand Corner King Room, 1 King Bed",Grand Corner King Room
3,"Suite, 1 King Bed (Parlor)",King Parlor Suite
4,"High-Floor Premium Room, 1 King Bed",High-Floor Premium King Room
5,"Traditional Double Room, 2 Double Beds",Double Room with Two Double Beds
6,"Room, 1 King Bed, Accessible",King Room - Disability Access
7,"Deluxe Room, 1 King Bed",Deluxe King Room
8,Deluxe Room,Deluxe Room (Non Refundable)
9,"Room, 2 Double Beds (19th to 25th Floors)",Two Double Beds - Location Room (19th to 25th ...


In [15]:
df[df.apply(lambda row: fuzz.token_set_ratio(row['Expedia'], row['Booking.com']), axis=1) > 60]

Unnamed: 0,Expedia,Booking.com
0,"Deluxe Room, 1 King Bed",Deluxe King Room
1,"Standard Room, 1 King Bed, Accessible",Standard King Roll-in Shower Accessible
2,"Grand Corner King Room, 1 King Bed",Grand Corner King Room
3,"Suite, 1 King Bed (Parlor)",King Parlor Suite
4,"High-Floor Premium Room, 1 King Bed",High-Floor Premium King Room
...,...,...
98,"Room, 1 King Bed, Accessible, Resort View (Ali...",Alii Tower Resort View With King Bed - Mobilit...
99,"Room, 1 King Bed, Accessible, View (Rainbow, B...",Rainbow Tower Ocean View With King Bed - Mobil...
100,"Room, 1 King Bed, Ocean View (Alii)",Alii Tower Ocean View With King Bed
101,"Room, 1 King Bed, Oceanfront (Rainbow)",Rainbow Tower Ocean Front with King Bed


In [16]:
len(df[df.apply(lambda row: fuzz.token_set_ratio(row['Expedia'], row['Booking.com']), axis=1) > 60]) / len(df)

0.9805825242718447