# Load Libraries

In [1]:
%load_ext autoreload
%autoreload 2
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import os
import openai
import pandas as pd
import bamboolib as bam
import json

In [3]:
from apikey import apikey
os.environ['OPENAI_API_KEY']=apikey
openai.api_key  = os.getenv('OPENAI_API_KEY')

# OpenAI

In [4]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{'role':'system', 'content':'You are an expert in classifying products into UNSPSC codes.'},
                {"role": "user", "content": prompt}]
    
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [29]:
def classify_UNSPSC(tabular_data):
    prompt=f"""
    You must classify the products delimited by triple backticks and Provide the output with the following keys :
    1.Output should be like the below
    {{
        "index": ,
        "Product Description": ,
        "UNSPSC coding":,
        "Segment": ,
        "Family": ,
        "Class": ,
        "Commodity": 
    }}
    [No Prose]
    ```{tabular_data}```
    """
    display(prompt)
    response=get_completion(prompt)
    print(f'response:\t{response}')
    response_dict=json.loads(response)
    print(f'response_dict:\t{response_dict}')
    
    return pd.DataFrame.from_dict(response_dict)  

In [22]:
def get_tabular(df,sample_size=5):
    sample_df=df.sample(sample_size)
    sample_df=sample_df.reset_index(drop=True)
    sample_df.rename(columns={'MaterialDescription': 'Product Description', 'Genericname ': 'Generic Product Description', 'SupplierName': 'Vendor Name'},inplace=True)
    X_df=sample_df[sample_df.columns[0:3]].reset_index()
    Y_df=sample_df[sample_df.columns[3:]].reset_index()

    X=X_df.to_dict(orient='records')
    tabular_data=str(X).replace('{','{{',).replace('}','}}')
    
    return (X,X_df,Y_df)
    

In [7]:
# tt=f"""
# Classify the products delimited by triple backticks and Provide the output with the following keys :
# index,Product Description,UNSPSC coding,Segment,Family,Class,Commodity.
# The output should only contain a JSON string
# ```
# [{{'index': 0, 'Product Description': 'BAKFLEX-A 8 MG tablet', 'Generic Product Description': 'thiocolchicoside-8 mg+aceclofenac-100 mg', 'Vendor Name': 'DROGARIA ANANTA'}}, {{'index': 1, 'Product Description': 'DOMIN 200 MG/5 ML ampoule solution for injection, 40 mg/mL', 'Generic Product Description': 'dopamine hydrochloride-200 mg', 'Vendor Name': 'VIMAL SALES CORPORATION'}}]
# ```
# """
# response=get_completion(tt)

# response_dict=json.loads(response)
# pd.DataFrame.from_dict(response_dict)

# UNSPSC Data

In [8]:
df = pd.read_csv('UNSPSCdataset.csv',encoding='mac_roman',low_memory=False)

In [9]:
df = df.loc[df['SupplierName'].notna()]
df = df.loc[df['Genericname '].notna()]
df = df.loc[df['Commodity'].notna()]
df = df[['MaterialDescription', 'Genericname ', 'SupplierName', 'Segment', 'Family', 'Class', 'Commodity', 'UNSPSC_Final']]
print(df.shape)
df = df.drop_duplicates(subset=['MaterialDescription', 'Genericname '], keep='first')
print(df.shape)

(5808, 8)
(744, 8)


In [32]:
tt=get_tabular(df,3)

In [33]:
result=classify_UNSPSC(tt[0])

'\n    You must classify the products delimited by triple backticks and Provide the output with the following keys :\n    1.Output should be like the below\n    {\n        "index": ,\n        "Product Description": ,\n        "UNSPSC coding":,\n        "Segment": ,\n        "Family": ,\n        "Class": ,\n        "Commodity": \n    }\n    [No Prose]\n    ```[{\'index\': 0, \'Product Description\': \'AZIPRO 250 MG tablet\', \'Generic Product Description\': \'azithromycin anhydrous-250 mg\', \'Vendor Name\': \'VARDHMAN PHARMA\'}, {\'index\': 1, \'Product Description\': \'ACAMPROL\', \'Generic Product Description\': \'ACAMPROSATE CALCIUM\', \'Vendor Name\': \'SALEM SENTHIL PHARMA DISTRIBUTORS\'}]```\n    '

response:	[
    {
        "index": 0,
        "Product Description": "AZIPRO 250 MG tablet",
        "UNSPSC coding": "51191600",
        "Segment": "Pharmaceuticals",
        "Family": "Drugs",
        "Class": "Antibacterials",
        "Commodity": "Azithromycin"
    },
    {
        "index": 1,
        "Product Description": "ACAMPROL",
        "UNSPSC coding": "51161810",
        "Segment": "Pharmaceuticals",
        "Family": "Drugs",
        "Class": "Central nervous system agents",
        "Commodity": "Acamprosate"
    }
]
response_dict:	[{'index': 0, 'Product Description': 'AZIPRO 250 MG tablet', 'UNSPSC coding': '51191600', 'Segment': 'Pharmaceuticals', 'Family': 'Drugs', 'Class': 'Antibacterials', 'Commodity': 'Azithromycin'}, {'index': 1, 'Product Description': 'ACAMPROL', 'UNSPSC coding': '51161810', 'Segment': 'Pharmaceuticals', 'Family': 'Drugs', 'Class': 'Central nervous system agents', 'Commodity': 'Acamprosate'}]


In [34]:
result

   index   Product Description UNSPSC coding          Segment Family  \
0      0  AZIPRO 250 MG tablet      51191600  Pharmaceuticals  Drugs   
1      1              ACAMPROL      51161810  Pharmaceuticals  Drugs   

                           Class     Commodity  
0                 Antibacterials  Azithromycin  
1  Central nervous system agents   Acamprosate  

In [35]:
pd.merge(result,)

   index   Product Description    Generic Product Description  \
0      0  AZIPRO 250 MG tablet  azithromycin anhydrous-250 mg   
1      1              ACAMPROL            ACAMPROSATE CALCIUM   

                         Vendor Name  
0                    VARDHMAN PHARMA  
1  SALEM SENTHIL PHARMA DISTRIBUTORS  

In [36]:
tt[2]

   index                            Segment                        Family  \
0      0  Drugs and Pharmaceutical Products          Anti infective drugs   
1      1  Drugs and Pharmaceutical Products  Central nervous system drugs   

                                         Class            Commodity  \
0                                  Antibiotics         Azithromycin   
1  Nonsteroidal anti inflammatory drugs NSAIDs  Acamprosate calcium   

   UNSPSC_Final  
0      51101572  
1      51142150  