### Text Cleaning/Preprocessing using - clean-text

In [1]:
# install the clean-text package
# ! pip install clean-text

In [2]:
# Import the library
from cleantext import clean

## Fix Unicode

In [3]:
text = 'Yóù àré rïght'
clean(text, fix_unicode = True)

'you are right'

## Lower Casing

In [5]:
caps = "KEEP LEARNING"
clean(caps, lower = True)

'keep learning'

## Remove & Replace URLs

In [6]:
url = 'www.bing.com is the best search website'

# remove URLs
clean(url,
      no_urls = True)

'<url> is the best search website'

In [7]:
# replace urls
clean(url,
      no_urls = True,
      replace_with_url = 'google' )

'google is the best search website'

## Remove & Replace Punctuation

In [8]:
punct = "wow ! you have won $10,000. "

# remove punctuation
clean(punct,
      no_punct = True)

'wow you have won $10000'

In [9]:
# replace punctuation
clean(punct,
      no_punct = True,
      replace_with_punct = "1")

'wow 1 you have won $1010001'

## Remove & Replace Currency Symbol

In [10]:
msg = "I have $ 100"

# remove currency symbol
clean(msg,
      no_currency_symbols = True)

'i have <cur> 100'

In [11]:
# replace currency symbol
clean(msg,
      no_currency_symbols = True,
      replace_with_currency_symbol = 'dollars')  

'i have dollars 100'

## Replace & Remove Numbers

In [12]:
num = "city is 5 kilometers away "

# remove digits/numbers
clean(num,
      no_digits = True)

'city is 0 kilometers away'

In [13]:
# replace digits/numbers
clean(num,
      no_digits = True,
      replace_with_digit = "five")

'city is five kilometers away'

## Combining all together

In [14]:
texts = "'Yóù can visit www.amazon.com, and pay 100 ₹ for the book."

clean(texts,
      fix_unicode = True,
      lower = True,
      no_urls = True,
      no_digits = True,
      no_currency_symbols = True,
      no_punct = True,
      replace_with_url = "website",
      replace_with_digit ="",
      replace_with_punct = "",
      replace_with_currency_symbol =" Rupees")

'you can visit website and pay rupees for the book'

#### Source : https://pypi.org/project/clean-text/ 