# Load Libraries

In [1]:
%load_ext autoreload
%autoreload 2
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import os
import openai
import pandas as pd
import bamboolib as bam
import json

In [3]:
from apikey import apikey
os.environ['OPENAI_API_KEY']=apikey
openai.api_key  = os.getenv('OPENAI_API_KEY')

# OpenAI

In [4]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{'role':'system', 'content':'You are an expert in classifying products into UNSPSC codes.'},
                {"role": "user", "content": prompt}]
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [5]:
def classify_UNSPSC(tabular_data):
    prompt=f"""
    You must classify the products delimited by triple backticks:
    1.Provide the output with the following keys only
    {{
        "index": ,
        "Product Description": ,
        "UNSPSC coding":,
        "Segment": ,
        "Family": ,
        "Class": ,
        "Commodity": 
    }}
    2.No AI introduction, no AI analysis, return generated data only, not human-readable
    ```{tabular_data}```"""
    display(prompt)
    response=get_completion(prompt)
    print(f'response:\t{response}')
    response_dict=json.loads(response)
    print(f'response_dict:\t{response_dict}')
    
    return pd.DataFrame.from_dict(response_dict)  

In [7]:
def get_tabular_UNSPSC(df,sample_size=5):
    sample_df=df.sample(sample_size)
    sample_df=sample_df.reset_index(drop=True)
    sample_df.rename(columns={'MaterialDescription': 'Product Description', 'Genericname ': 'Generic Product Description', 'SupplierName': 'Vendor Name'},inplace=True)
    X_df=sample_df[sample_df.columns[0:3]].reset_index()
    Y_df=sample_df[sample_df.columns[3:]].reset_index()

    X=X_df.to_dict(orient='records')
    X=str(X).replace('{','{{',).replace('}','}}')
    
    return (X,X_df,Y_df)
    

In [8]:
# tt=f"""
# Classify the products delimited by triple backticks and Provide the output with the following keys :
# index,Product Description,UNSPSC coding,Segment,Family,Class,Commodity.
# The output should only contain a JSON string
# ```
# [{{'index': 0, 'Product Description': 'BAKFLEX-A 8 MG tablet', 'Generic Product Description': 'thiocolchicoside-8 mg+aceclofenac-100 mg', 'Vendor Name': 'DROGARIA ANANTA'}}, {{'index': 1, 'Product Description': 'DOMIN 200 MG/5 ML ampoule solution for injection, 40 mg/mL', 'Generic Product Description': 'dopamine hydrochloride-200 mg', 'Vendor Name': 'VIMAL SALES CORPORATION'}}]
# ```
# """
# response=get_completion(tt)

# response_dict=json.loads(response)
# pd.DataFrame.from_dict(response_dict)

# UNSPSC Data

In [9]:
df = pd.read_csv('UNSPSCdataset.csv',encoding='mac_roman',low_memory=False)

In [10]:
df = df.loc[df['SupplierName'].notna()]
df = df.loc[df['Genericname '].notna()]
df = df.loc[df['Commodity'].notna()]
df = df[['MaterialDescription', 'Genericname ', 'SupplierName', 'Segment', 'Family', 'Class', 'Commodity', 'UNSPSC_Final']]
print(df.shape)
df = df.drop_duplicates(subset=['MaterialDescription', 'Genericname '], keep='first')
print(df.shape)

(5808, 8)
(744, 8)


In [11]:
df.to_csv('UNSPSCdataset_final.csv',index=False)

# Predict 

In [12]:
tt=get_tabular_UNSPSC(df)

In [13]:
tt[0]

"[{{'index': 0, 'Product Description': 'CARDACE 10 MG tablet', 'Generic Product Description': 'ramipril-10 mg', 'Vendor Name': 'L N B ENTERPRISES'}}, {{'index': 1, 'Product Description': 'DIABETROL SR tablet', 'Generic Product Description': 'glibenclamide-5 mg+metformin hydrochloride sustained release-500 mg', 'Vendor Name': 'BARROS ENTERPRISES'}}, {{'index': 2, 'Product Description': 'COMBIHALE FT 250 R/C', 'Generic Product Description': 'FORMOTEROL FUMARATE+TIOTROPIUM DRY POWDER', 'Vendor Name': 'GEETHA PHARMA'}}, {{'index': 3, 'Product Description': 'CANDY 300MG CAPS', 'Generic Product Description': 'CLINDAMYCIN  CAPS 300MG', 'Vendor Name': 'PHARMA ASSOCIATES'}}, {{'index': 4, 'Product Description': 'CREPE BANDAGE 10CM', 'Generic Product Description': 'CREPE BANDAGE', 'Vendor Name': 'MEDITEK INDIA'}}]"

In [14]:
# json.dumps(tt[0])

In [15]:
result=classify_UNSPSC(tt[0])

'\n    You must classify the products delimited by triple backticks:\n    1.Provide the output with the following keys only\n    {\n        "index": ,\n        "Product Description": ,\n        "UNSPSC coding":,\n        "Segment": ,\n        "Family": ,\n        "Class": ,\n        "Commodity": \n    }\n    2.No AI introduction, no AI analysis, return generated data only, not human-readable\n    ```[{{\'index\': 0, \'Product Description\': \'CARDACE 10 MG tablet\', \'Generic Product Description\': \'ramipril-10 mg\', \'Vendor Name\': \'L N B ENTERPRISES\'}}, {{\'index\': 1, \'Product Description\': \'DIABETROL SR tablet\', \'Generic Product Description\': \'glibenclamide-5 mg+metformin hydrochloride sustained release-500 mg\', \'Vendor Name\': \'BARROS ENTERPRISES\'}}, {{\'index\': 2, \'Product Description\': \'COMBIHALE FT 250 R/C\', \'Generic Product Description\': \'FORMOTEROL FUMARATE+TIOTROPIUM DRY POWDER\', \'Vendor Name\': \'GEETHA PHARMA\'}}, {{\'index\': 3, \'Product Descript

response:	[{"index": 0, "Product Description": "CARDACE 10 MG tablet", "UNSPSC coding": "51300000", "Segment": "Pharmaceuticals and Biochemicals and Healt...", "Family": "Drugs and Pharmaceutical Products", "Class": "Systemic hormonal preparations, excl. sex ho...", "Commodity": "Adrenergics and dopaminergics and agents, excl..."}, {"index": 1, "Product Description": "DIABETROL SR tablet", "UNSPSC coding": "51300000", "Segment": "Pharmaceuticals and Biochemicals and Healt...", "Family": "Drugs and Pharmaceutical Products", "Class": "Systemic hormonal preparations, excl. sex ho...", "Commodity": "Antidiabetic agents"}, {"index": 2, "Product Description": "COMBIHALE FT 250 R/C", "UNSPSC coding": "51300000", "Segment": "Pharmaceuticals and Biochemicals and Healt...", "Family": "Drugs and Pharmaceutical Products", "Class": "Respiratory system drugs", "Commodity": "Bronchodilator agents"}, {"index": 3, "Product Description": "CANDY 300MG CAPS", "UNSPSC coding": "51300000", "Segment": "Pharm

In [16]:
result

   index   Product Description UNSPSC coding  \
0      0  CARDACE 10 MG tablet      51300000   
1      1   DIABETROL SR tablet      51300000   
2      2  COMBIHALE FT 250 R/C      51300000   
3      3      CANDY 300MG CAPS      51300000   
4      4    CREPE BANDAGE 10CM      42311505   

                                          Segment  \
0   Pharmaceuticals and Biochemicals and Healt...   
1   Pharmaceuticals and Biochemicals and Healt...   
2   Pharmaceuticals and Biochemicals and Healt...   
3   Pharmaceuticals and Biochemicals and Healt...   
4  Medical Equipment and Accessories and Suppl...   

                              Family  \
0  Drugs and Pharmaceutical Products   
1  Drugs and Pharmaceutical Products   
2  Drugs and Pharmaceutical Products   
3  Drugs and Pharmaceutical Products   
4       Medical apparel and textiles   

                                             Class  \
0  Systemic hormonal preparations, excl. sex ho...   
1  Systemic hormonal preparations, excl. se

In [24]:
tt="""[{'index': 0, 'Product Description': 'DERMADEW CALOE LOTION 60ML', 'UNSPSC coding': '53131600', 'Segment': 'Segment Not Available', 'Family': 'Family Not Available', 'Class': 'Class Not Available', 'Commodity': 'Commodity Not Available'}, {'index': 1, 'Product Description': 'CAVAFIX 134 B-BRAN', 'UNSPSC coding': '42295451', 'Segment': 'Segment Not Available', 
'Family': 'Family Not Available', 'Class': 'Class Not Available', 'Commodity': 'Commodity Not Available'}, {'index': 2, 'Product Description': 'AZUKON MR 30 MG tab', 'UNSPSC coding': '51241100', 'Segment': 'Segment Not Available', 'Family': 'Family Not Available', 'Class': 'Class Not Available', 'Commodity': 'Commodity Not Available'}, {'index': 3, 'Product Description': 'BECOZINC CAPS', 'UNSPSC coding': '51191900', 'Segment': 'Segment Not Available', 'Family': 'Family Not Available', 'Class': 'Class Not Available', 'Commodity': 'Commodity Not Available'}, {'index': 4, 'Product Description': 'BETADINE 5% w/w cream 10 GM', 'UNSPSC coding': '51102700', 'Segment': 'Segment Not Available', 'Family': 'Family Not Available', 'Class': 'Class Not Available', 'Commodity': 'Commodity Not Available'}]"""

In [26]:
json.loads(tt)

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 3 (char 2)