In [1]:
# Import necessary libraries.
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
import pandas as pd

In [2]:
# Define subscription endpoint and key.
# key = "PLEASE-ENTER-YOUR-OWN-KEY"
# endpoint = "https://PLEASE-ENTER-YOUR-OWN-ENDPOINT.cognitiveservices.azure.com/"
key = "PLEASE-ENTER-YOUR-OWN-KEY"
endpoint = "https://PLEASE-ENTER-YOUR-OWN-ENDPOINT.cognitiveservices.azure.com/"

In [3]:
# Authenticate the client.
def authenticate_client():
    ta_credential = AzureKeyCredential(key)
    text_analytics_client = TextAnalyticsClient(
            endpoint=endpoint, 
            credential=ta_credential)
    return text_analytics_client

client = authenticate_client()

In [4]:
# Perform Key Phrase Extraction for sample English description.
# https://docs.microsoft.com/en-us/azure/cognitive-services/language-service/key-phrase-extraction/overview
def key_phrase_extraction_example(client):

    try:
        documents = [
                     "Japan Origin. Combat hair breakage with Dove Japan Hair Breakage Protection range and bring back a head of silky hair stronger and smoother."
                    ]

        response = client.extract_key_phrases(documents = documents, language="en")[0]

        if not response.is_error:
            print("\tKey Phrases:")
            for phrase in response.key_phrases:
                print("\t\t", phrase)
        else:
            print(response.id, response.error)

    except Exception as err:
        print("Encountered exception. {}".format(err))
        
key_phrase_extraction_example(client)

	Key Phrases:
		 Dove Japan Hair Breakage Protection range
		 Combat hair breakage
		 Japan Origin
		 silky hair
		 head


In [6]:
# Perform Key Phrase Extraction for sample Tranditional Chinese description (best clean).
def key_phrase_extraction_example(client):

    try:
        documents = [
                     "全新安怡的MoveMax配方蘊含鈣質骨膠原蛋白質及多種微量元素提供每日所需營養令你骨骼更強健關節更靈活肌肉更柔韌BR原產地馬來西亞奶粉源自紐西蘭BRBR含鈣質維他命D鎂及鋅有助您保持骨骼強健BR含骨膠原每杯含克骨膠原是組成關節軟骨主要成分強壯的關節軟骨有助支持關節靈活移動BR含維他命C及E有助支持關節靈活移動及保護細胞免受損傷BR含蛋白質鎂維他命B雜及維他命D有助保持肌肉最佳狀態及功能BR"
                    ]

        response = client.extract_key_phrases(documents = documents, language="zh-hans")[0]

        if not response.is_error:
            print("\tKey Phrases:")
            for phrase in response.key_phrases:
                print("\t\t", phrase)
        else:
            print(response.id, response.error)

    except Exception as err:
        print("Encountered exception. {}".format(err))
        
key_phrase_extraction_example(client)

	Key Phrases:
		 MoveMax配方
		 BR原產
		 BRBR
		 命D鎂
		 命C
		 損傷BR
		 B雜
		 全新安怡
		 鈣質骨膠原蛋白質
		 微量元素
		 營養令
		 骨骼
		 節
		 肌肉
		 西亞奶粉
		 紐西
		 鈣質維
		 鋅
		 助
		 健
		 杯
		 克骨膠原
		 成分
		 壯
		 保護
		 細胞
		 最佳狀態
		 功能


In [7]:
# Perform Key Phrase Extraction for sample Tranditional Chinese description.
def key_phrase_extraction_example(client):

    try:
        documents = [
                     "全新安怡的MoveMax配方，蘊含鈣質、骨膠原、蛋白質及多種微量元素，提供每日所需營養，令你骨骼更強健、關節更靈活、肌肉更柔韌。原產地馬來西亞, 奶粉源自紐西蘭。含鈣質、維他命D、鎂及鋅，有助您保持骨骼強健。含骨膠原(每杯含0.5克)。骨膠原是組成關節軟骨主要成分，強壯的關節軟骨有助支持關節靈活移動。含維他命C及E有助支持關節靈活移動及保護細胞免受損傷。含蛋白質、鎂、維他命B雜及維他命D，有助保持肌肉最佳狀態及功能。"
                    ]

        response = client.extract_key_phrases(documents = documents, language="zh-hans")[0]

        if not response.is_error:
            print("\tKey Phrases:")
            for phrase in response.key_phrases:
                print("\t\t", phrase)
        else:
            print(response.id, response.error)

    except Exception as err:
        print("Encountered exception. {}".format(err))
        
key_phrase_extraction_example(client)

	Key Phrases:
		 MoveMax配方
		 C
		 全新安怡
		 鈣質
		 骨膠原
		 蛋白質
		 微量元素
		 營養
		 骨骼
		 節
		 肌肉
		 原產地馬來西亞
		 奶粉
		 紐西蘭
		 鎂
		 鋅
		 助
		 保護細胞
		 損傷
		 雜
		 功能


In [8]:
# Perform Key Phrase Extraction for sample Simplified Chinese description (best clean).
def key_phrase_extraction_example(client):

    try:
        documents = [
                     "全新安怡的MoveMax配方蕴含钙质骨胶原蛋白质及多种微量元素提供每日所需营养令你骨骼更强健关节更灵活肌肉更柔韧BR原产地马来西亚奶粉源自纽西兰BRBR含钙质维他命D镁及锌有助您保持骨骼强健BR含骨胶原每杯含克骨胶原是组成关节软骨主要成分强壮的关节软骨有助支持关节灵活移动BR含维他命C及E有助支持关节灵活移动及保护细胞免受损伤BR含蛋白质镁维他命B杂及维他命D有助保持肌肉最佳状态及功能BR"
                    ]

        response = client.extract_key_phrases(documents = documents, language="zh-hans")[0]

        if not response.is_error:
            print("\tKey Phrases:")
            for phrase in response.key_phrases:
                print("\t\t", phrase)
        else:
            print(response.id, response.error)

    except Exception as err:
        print("Encountered exception. {}".format(err))
        
key_phrase_extraction_example(client)

	Key Phrases:
		 MoveMax
		 纽西兰BRBR
		 命D镁
		 移动BR
		 命C
		 损伤BR
		 B杂
		 功能BR
		 全新安怡
		 钙质骨胶原蛋白质
		 多种微量元素
		 营养
		 骨骼
		 健关节
		 肌肉
		 原产
		 马来西亚奶粉
		 钙质维
		 锌
		 助
		 杯
		 克骨胶原
		 软骨
		 成分
		 细胞
		 蛋白质镁维
		 维他命


In [9]:
# Perform Key Phrase Extraction for sample Simplified Chinese description.
def key_phrase_extraction_example(client):

    try:
        documents = [
                     "全新安怡的MoveMax配方，蕴含钙质、骨胶原、蛋白质及多种微量元素，提供每日所需营养，令你骨骼更强健、关节更灵活、肌肉更柔韧。原产地马来西亚, 奶粉源自纽西兰。含钙质、维他命D、镁及锌，有助您保持骨骼强健。含骨胶原(每杯含0.5克)。骨胶原是组成关节软骨主要成分，强壮的关节软骨有助支持关节灵活移动。含维他命C及E有助支持关节灵活移动及保护细胞免受损伤。含蛋白质、镁、维他命B杂及维他命D，有助保持肌肉最佳状态及功能。"
                    ]

        response = client.extract_key_phrases(documents = documents, language="zh-hans")[0]

        if not response.is_error:
            print("\tKey Phrases:")
            for phrase in response.key_phrases:
                print("\t\t", phrase)
        else:
            print(response.id, response.error)

    except Exception as err:
        print("Encountered exception. {}".format(err))
        
key_phrase_extraction_example(client)

	Key Phrases:
		 MoveMax配方
		 维他命C
		 B杂
		 全新安怡
		 钙质
		 骨胶原
		 蛋白质
		 多种微量元素
		 营养
		 骨骼
		 关节
		 肌肉
		 原产地马来西亚
		 奶粉
		 纽西兰
		 镁
		 锌
		 移动
		 助
		 保护细胞
		 损伤
		 功能


In [11]:
# Perform Named Entity Recognition for sample English description.
# https://docs.microsoft.com/en-us/azure/cognitive-services/language-service/named-entity-recognition/overview
def entity_recognition_example(client):

    try:
        documents = ["Japan Origin. Combat hair breakage with Dove Japan Hair Breakage Protection range and bring back a head of silky hair stronger and smoother."]
        result = client.recognize_entities(documents = documents, language="en")[0]

        print("Named Entities:\n")
        for entity in result.entities:
            print("\tText: \t", entity.text, "\tCategory: \t", entity.category, "\tSubCategory: \t", entity.subcategory,
                    "\n\tConfidence Score: \t", round(entity.confidence_score, 2), "\tLength: \t", entity.length, "\tOffset: \t", entity.offset, "\n")

    except Exception as err:
        print("Encountered exception. {}".format(err))
entity_recognition_example(client)

Named Entities:

	Text: 	 Japan 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 1.0 	Length: 	 5 	Offset: 	 0 

	Text: 	 Dove Japan Hair Breakage Protection 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.7 	Length: 	 35 	Offset: 	 40 



In [12]:
# Perform Named Entity Recognition for sample Tranditional Chinese description (best clean).
def entity_recognition_example(client):

    try:
        documents = ["全新安怡的MoveMax配方蘊含鈣質骨膠原蛋白質及多種微量元素提供每日所需營養令你骨骼更強健關節更靈活肌肉更柔韌BR原產地馬來西亞奶粉源自紐西蘭BRBR含鈣質維他命D鎂及鋅有助您保持骨骼強健BR含骨膠原每杯含克骨膠原是組成關節軟骨主要成分強壯的關節軟骨有助支持關節靈活移動BR含維他命C及E有助支持關節靈活移動及保護細胞免受損傷BR含蛋白質鎂維他命B雜及維他命D有助保持肌肉最佳狀態及功能BR"]
        result = client.recognize_entities(documents = documents, language="zh-hans")[0]

        print("Named Entities:\n")
        for entity in result.entities:
            print("\tText: \t", entity.text, "\tCategory: \t", entity.category, "\tSubCategory: \t", entity.subcategory,
                    "\n\tConfidence Score: \t", round(entity.confidence_score, 2), "\tLength: \t", entity.length, "\tOffset: \t", entity.offset, "\n")

    except Exception as err:
        print("Encountered exception. {}".format(err))
entity_recognition_example(client)

Named Entities:

	Text: 	 Move 	Category: 	 Organization 	SubCategory: 	 None 
	Confidence Score: 	 0.49 	Length: 	 4 	Offset: 	 5 

	Text: 	 Max 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.44 	Length: 	 3 	Offset: 	 9 

	Text: 	 每日 	Category: 	 DateTime 	SubCategory: 	 Set 
	Confidence Score: 	 0.8 	Length: 	 2 	Offset: 	 33 

	Text: 	 馬來西亞 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 0.77 	Length: 	 4 	Offset: 	 61 

	Text: 	 粉 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.76 	Length: 	 1 	Offset: 	 66 

	Text: 	 紐西蘭 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 0.89 	Length: 	 3 	Offset: 	 69 

	Text: 	 節 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.64 	Length: 	 1 	Offset: 	 150 



In [13]:
# Perform Named Entity Recognition for sample Tranditional Chinese description.
def entity_recognition_example(client):

    try:
        documents = ["全新安怡的MoveMax配方，蘊含鈣質、骨膠原、蛋白質及多種微量元素，提供每日所需營養，令你骨骼更強健、關節更靈活、肌肉更柔韌。原產地馬來西亞, 奶粉源自紐西蘭。含鈣質、維他命D、鎂及鋅，有助您保持骨骼強健。含骨膠原(每杯含0.5克)。骨膠原是組成關節軟骨主要成分，強壯的關節軟骨有助支持關節靈活移動。含維他命C及E有助支持關節靈活移動及保護細胞免受損傷。含蛋白質、鎂、維他命B雜及維他命D，有助保持肌肉最佳狀態及功能。"]
        result = client.recognize_entities(documents = documents, language="zh-hans")[0]

        print("Named Entities:\n")
        for entity in result.entities:
            print("\tText: \t", entity.text, "\tCategory: \t", entity.category, "\tSubCategory: \t", entity.subcategory,
                    "\n\tConfidence Score: \t", round(entity.confidence_score, 2), "\tLength: \t", entity.length, "\tOffset: \t", entity.offset, "\n")

    except Exception as err:
        print("Encountered exception. {}".format(err))
entity_recognition_example(client)

Named Entities:

	Text: 	 MoveMax 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.66 	Length: 	 7 	Offset: 	 5 

	Text: 	 每日 	Category: 	 DateTime 	SubCategory: 	 Set 
	Confidence Score: 	 0.8 	Length: 	 2 	Offset: 	 37 

	Text: 	 馬來西亞 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 0.99 	Length: 	 4 	Offset: 	 67 

	Text: 	 奶粉 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.97 	Length: 	 2 	Offset: 	 73 

	Text: 	 紐西蘭 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 1.0 	Length: 	 3 	Offset: 	 77 

	Text: 	 鎂 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.62 	Length: 	 1 	Offset: 	 90 

	Text: 	 含骨膠原 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.76 	Length: 	 4 	Offset: 	 104 

	Text: 	 0.5克 	Category: 	 Quantity 	SubCategory: 	 Dimension 
	Confidence Score: 	 0.8 	Length: 	 4 	Offset: 	 112 

	Text: 	 骨膠原 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.97 	Lengt

In [15]:
# Perform Named Entity Recognition for sample Simplified Chinese description (best clean).
def entity_recognition_example(client):

    try:
        documents = ["全新安怡的MoveMax配方蕴含钙质骨胶原蛋白质及多种微量元素提供每日所需营养令你骨骼更强健关节更灵活肌肉更柔韧BR原产地马来西亚奶粉源自纽西兰BRBR含钙质维他命D镁及锌有助您保持骨骼强健BR含骨胶原每杯含克骨胶原是组成关节软骨主要成分强壮的关节软骨有助支持关节灵活移动BR含维他命C及E有助支持关节灵活移动及保护细胞免受损伤BR含蛋白质镁维他命B杂及维他命D有助保持肌肉最佳状态及功能BR"]
        result = client.recognize_entities(documents = documents, language="zh-hans")[0]

        print("Named Entities:\n")
        for entity in result.entities:
            print("\tText: \t", entity.text, "\tCategory: \t", entity.category, "\tSubCategory: \t", entity.subcategory,
                    "\n\tConfidence Score: \t", round(entity.confidence_score, 2), "\tLength: \t", entity.length, "\tOffset: \t", entity.offset, "\n")

    except Exception as err:
        print("Encountered exception. {}".format(err))
entity_recognition_example(client)

Named Entities:

	Text: 	 Move 	Category: 	 Organization 	SubCategory: 	 None 
	Confidence Score: 	 0.43 	Length: 	 4 	Offset: 	 5 

	Text: 	 每日 	Category: 	 DateTime 	SubCategory: 	 Set 
	Confidence Score: 	 0.8 	Length: 	 2 	Offset: 	 33 

	Text: 	 马来西亚 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 0.95 	Length: 	 4 	Offset: 	 61 

	Text: 	 维他命D 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.72 	Length: 	 4 	Offset: 	 177 



In [14]:
# Perform Named Entity Recognition for sample Simplified Chinese description.
def entity_recognition_example(client):

    try:
        documents = ["全新安怡的MoveMax配方，蕴含钙质、骨胶原、蛋白质及多种微量元素，提供每日所需营养，令你骨骼更强健、关节更灵活、肌肉更柔韧。原产地马来西亚, 奶粉源自纽西兰。含钙质、维他命D、镁及锌，有助您保持骨骼强健。含骨胶原(每杯含0.5克)。骨胶原是组成关节软骨主要成分，强壮的关节软骨有助支持关节灵活移动。含维他命C及E有助支持关节灵活移动及保护细胞免受损伤。含蛋白质、镁、维他命B杂及维他命D，有助保持肌肉最佳状态及功能。"]
        result = client.recognize_entities(documents = documents, language="zh-hans")[0]

        print("Named Entities:\n")
        for entity in result.entities:
            print("\tText: \t", entity.text, "\tCategory: \t", entity.category, "\tSubCategory: \t", entity.subcategory,
                    "\n\tConfidence Score: \t", round(entity.confidence_score, 2), "\tLength: \t", entity.length, "\tOffset: \t", entity.offset, "\n")

    except Exception as err:
        print("Encountered exception. {}".format(err))
entity_recognition_example(client)

Named Entities:

	Text: 	 MoveMax 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.49 	Length: 	 7 	Offset: 	 5 

	Text: 	 每日 	Category: 	 DateTime 	SubCategory: 	 Set 
	Confidence Score: 	 0.8 	Length: 	 2 	Offset: 	 37 

	Text: 	 马来西亚 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 1.0 	Length: 	 4 	Offset: 	 67 

	Text: 	 奶粉 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.89 	Length: 	 2 	Offset: 	 73 

	Text: 	 纽西兰 	Category: 	 Location 	SubCategory: 	 GPE 
	Confidence Score: 	 1.0 	Length: 	 3 	Offset: 	 77 

	Text: 	 钙质 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.68 	Length: 	 2 	Offset: 	 82 

	Text: 	 维他命D 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.85 	Length: 	 4 	Offset: 	 85 

	Text: 	 镁 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.95 	Length: 	 1 	Offset: 	 90 

	Text: 	 锌 	Category: 	 Product 	SubCategory: 	 None 
	Confidence Score: 	 0.87 	Length: 	 1 	Offs