#### This was used to test extraction of features using the Anthropic Claude 3 Sonnet model through an API.

In [1]:
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
import os
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
import json

In [2]:
load_dotenv()
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

In [5]:
llm = ChatAnthropic(temperature=0, model_name="claude-3-sonnet-20240229")

In [14]:
# Define the prompt template
prompt_template = """
Extract the features of the provided product and represent them as separate key-value pairs in a JSON format. Ensure that each feature is listed individually without any lists or dictionaries within the features. If the feature includes a specific attribute (e.g., size, capacity, type, port), include that as well. List each port as a separate feature. Return the extracted features as a JSON object.
Input:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/LENOVO-IdeaPad-Gaming-3-15ACH6-R5-5500H-16GB-512GB-RTX-2050-4GB",
  "title": "",
  "warranty": "24",
  "regular_price": "39.999",
  "happy_price": "36.999",
  "description": [
    "Лаптоп",
    "Дисплеј:15.6\" (39.6cm)",
    "Резолуција:(1920x1080) FHD, IPS 250nits Anti-glare, 45% NTSC, 60Hz",
    "Chipset: AMD SoC Platform",
    "Процесор:AMD Ryzen™ 5 5500H (4C / 8T, 3.3 / 4.2GHz, 2MB L2 / 8MB L3)",
    "Графичка:NVIDIA® GeForce RTX™ 2050 4GB GDDR6",
    "RAM меморија:16GB DDR4-3200",
    "Диск (Storage):512GB SSD M.2 2242 PCIe® 4.0x4 NVMe®",
    "Без оперативен систем",
    "WLAN + Bluetooth: 11ac 2x2 + BT5.0",
    "Камера:HD 720p with Privacy Shutter",
    "Аудио: High Definition (HD) Audio, Realtek® ALC3287 codec",
    "Звучници: Stereo speakers, 2W x2, Nahimic Audio",
    "Микрофон:2x, Array",
    "Тастатура: White Backlit, English",
    "Wireless:Wi-Fi® 6, 11ax 2x2 + BT5.1",
    "Порти:",
    "2x USB 3.2 Gen 1",
    "1x USB-C® 3.2 Gen 1 (support data transfer only)",
    "1x HDMI® 2.0",
    "1x Headphone / microphone combo jack (3.5mm)",
    "1x Ethernet (RJ-45)",
    "1x Power connector",
    "Батерија: Integrated 45Wh",
    "Power Adapter: 135W Slim Tip (3-pin)"
  ],
  "category": "gaming_laptopi"
}
Output:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/LENOVO-IdeaPad-Gaming-3-15ACH6-R5-5500H-16GB-512GB-RTX-2050-4GB",
  "title": "",
  "regular_price": "39.999",
  "happy_price": "36.999",
  "warranty": "24",
  "features": {
    "Display": "15.6\" (39.6cm)",
    "Resolution": "1920x1080 FHD, IPS 250nits Anti-glare",
    "Chipset": "AMD SoC Platform",
    "Processor": "AMD Ryzen™ 5 5500H (4C / 8T, 3.3 / 4.2GHz, 2MB L2 / 8MB L3)",
    "Graphics": "NVIDIA® GeForce RTX™ 2050 4GB GDDR6",
    "RAM": "16GB DDR4-3200",
    "Storage": "512GB SSD M.2 2242 PCIe® 4.0x4 NVMe®",
    "Operating System": "Без оперативен систем",
    "Wireless": "Wi-Fi® 6, 11ax 2x2 + BT5.1",
    "Camera": "HD 720p with Privacy Shutter",
    "Audio": "High Definition (HD) Audio, Realtek® ALC3287 codec",
    "Speakers": "Stereo speakers, 2W x2, Nahimic Audio",
    "Microphone": "2x, Array",
    "Keyboard": "White Backlit, English",
    "Headset Port": "1 x Headset (headphone and microphone combo) port",
    "RJ45 Ethernet Port": "1 x RJ45 Ethernet port",
    "USB Ports": "3 x USB 3.2 Gen 1 ports",
    "HDMI Port": "1 x HDMI 2.0 port",
    "USB-C Port": "1 x USB-C 3.2 Gen 1 port (support data transfer only)",
    "Battery": "Integrated 45Wh",
    "Power Adapter": "135W Slim Tip (3-pin)",
    "Display Port": false
  }
}
Input:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/ACER-AN515-57-53A7-i5-11400H-8GB-512B-RTX-3050-4GB",
  "title": "ЛАПТОП ACER AN515-57-53A7 I5-11400H/8GB/512B/RTX 3050 4GB",
  "warranty": "24",
  "regular_price": "55.999",
  "happy_price": "47.999",
  "description": [
    "Лаптоп",
    "Дисплеј:15.6\" (39.6cm)",
    "Резолуција:(1920 x 1080) FHD IPS 144Hz",
    "Процесор:Intel® Core i5-11400H 2.70 GHz Hexa-core",
    "Графика:NVIDIA® GeForce RTX 3050 4GB",
    "RAM меморија:8GB DDR4",
    "Диск (Storage):512GB SSD",
    "LAN:IEEE 802.11 a/b/g/n/ac/ax",
    "Gigabit Ethernet",
    "Bluetooth 5.2",
    "Микрофон",
    "Оперативен систем:UEFI Shell",
    "Порти:",
    "HDMI, USB, LAN, Type C",
    "Батерија:4-cell (Li-Ion) - 8h"
  ],
  "category": "gaming_laptopi"
}
Output:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/ACER-AN515-57-53A7-i5-11400H-8GB-512B-RTX-3050-4GB",
  "title": "ЛАПТОП ACER AN515-57-53A7 I5-11400H/8GB/512B/RTX 3050 4GB",
  "regular_price": "55.999",
  "happy_price": "47.999",
  "warranty": "24",
  "features": {
    "Display": "15.6\" (39.6cm)",
    "Resolution": "1920 x 1080 FHD IPS 144Hz",
    "Processor": "Intel® Core i5-11400H 2.70 GHz Hexa-core",
    "Graphics": "NVIDIA® GeForce RTX 3050 4GB",
    "RAM": "8GB DDR4",
    "Storage": "512GB SSD",
    "LAN": "IEEE 802.11 a/b/g/n/ac/ax, Gigabit Ethernet",
    "Bluetooth": "5.2",
    "Microphone": true,
    "Operating System": "UEFI Shell",
    "HDMI Port": "1 x HDMI port",
    "USB Port": "1 x USB port",
    "LAN Port": "1 x LAN port",
    "Type C Port": "1 x Type C port",
    "Battery": "4-cell (Li-Ion) - 8h"
  }
}

Input:
{
"url": "https://www.neptun.mk/categories/gaming_laptopi/DELL-G15-5530-i5-13450HX-16GB-DDR5-512GB-RTX-3050-6GB",
"title": "ЛАПТОП DELL G15 5530 I5-13450HX/16GB DDR5/512GB/RTX 3050 6GB",
"warranty": "36",
"regular_price": "69.999",
"happy_price": "62.999",
"description": [
"Лаптоп",
"Дисплеј:15.6" (39.6cm)",
"Резолуција: (1920 x 1080) FHD 120Hz 250 nits WVA Anti- Glare LED Backlit Narrow Border Display",
"Процесор:13th Gen Intel® Core™ i5-13450HX (20 MB cache, 10 cores, 16 threads, up to 4.60 GHz Turbo)",
"Графика:NVIDIA® GeForce RTX™ 3050, 6 GB GDDR6",
"RAM Меморија: 16 GB: 2 x 8 GB, DDR5, 4800 MT/s",
"Диск (Storage): 512 GB, M.2, PCIe NVMe, SSD",
"Wi-Fi 6 AX201, 2x2, 802.11ax, Bluetooth® 5.2",
"Камера:Integrated widescreen HD (720p) Webcam with Single Array Digital Microphone",
"Порти:",
"1 x Headset (headphone and microphone combo) port",
"1 x RJ45 Ethernet port",
"3 x USB 3.2 Gen 1 ports",
"1 x HDMI 2.1 port",
"1 x USB-C 3.2 Gen 2 port with DisplayPort™",
"Батерија: 3 Cell, 56 Wh, integrated",
"Полнач: 240W AC Adapter",
"Позадинско осветлување на тастатурата (backlit keyboard)",
"Carbon BlackDark Shadow Gray with Black thermal shelf"
],
"category": "gaming_laptopi"
}
Output:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/DELL-G15-5530-i5-13450HX-16GB-DDR5-512GB-RTX-3050-6GB",
  "title": "ЛАПТОП DELL G15 5530 I5-13450HX/16GB DDR5/512GB/RTX 3050 6GB",
  "warranty": "36",
  "regular_price": "69.999",
  "happy_price": "62.999",
  "features": {
    "Display": "15.6\" (39.6cm)",
    "Resolution": "(1920 x 1080) FHD 120Hz 250 nits WVA Anti-Glare LED Backlit Narrow Border Display",
    "Processor": "13th Gen Intel® Core™ i5-13450HX (20 MB cache, 10 cores, 16 threads, up to 4.60 GHz Turbo)",
    "Graphics": "NVIDIA® GeForce RTX™ 3050, 6 GB GDDR6",
    "RAM": "16 GB: 2 x 8 GB, DDR5, 4800 MT/s",
    "Storage": "512 GB, M.2, PCIe NVMe, SSD",
    "Wireless": "Wi-Fi 6 AX201, 2x2, 802.11ax, Bluetooth® 5.2",
    "Camera": "Integrated widescreen HD (720p) Webcam with Single Array Digital Microphone",
    "Headset Port": "1 x Headset (headphone and microphone combo) port",
    "RJ45 Ethernet Port": "1 x RJ45 Ethernet port",
    "USB Ports": "3 x USB 3.2 Gen 1 ports",
    "HDMI Port": "1 x HDMI 2.1 port",
    "USB-C Port": "1 x USB-C 3.2 Gen 2 port™",
    "Display Port": true,
    "Battery": "3 Cell, 56 Wh, integrated",
    "Power Adapter": "240W AC Adapter",
    "Backlit Keyboard": "Pozadinsko осветлување на тастатурата (backlit keyboard)",
    "Color": "Carbon BlackDark Shadow Gray with Black thermal shelf"
  }
}
Provided product:
{{text}}
"""
prompt = PromptTemplate(template=prompt_template, input_variables=["text"], template_format="jinja2")
chain = LLMChain(llm=llm, prompt=prompt)

In [15]:
# Directory containing JSON files
input_directory = r"C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_with_categories\gaming_laptopi"
# Directory to save processed JSON files
output_directory = r"C:\Users\tomce\OneDrive - UKIM, FINKI\Desktop\Fakultet 3ta Godina\2 Sesti Semestar\0 DATA SCIENCE SEMINARSKA\1 Starting Over\products_opus_testing"

# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

In [17]:
# Counter to limit processing to 10 files
counter = 0
max_files = 10

# Loop through all files in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith('.json'):
        input_filepath = os.path.join(input_directory, filename)
        
        # Open and read each JSON file
        with open(input_filepath, 'r') as file:
            data = json.load(file)
            text = json.dumps(data)
        
            print(text)
            
            # Process the JSON data using the LangChain model
            structured_data = chain.invoke({"text": text})
            
            # Determine the output file path
            output_filename = f"processed_{filename}"
            output_filepath = os.path.join(output_directory, output_filename)
            
            # Write the processed data to the output file
            with open(output_filepath, 'w') as outfile:
                json.dump(structured_data, outfile, indent=4)
        
        # Increment the counter
        counter += 1

print(f"Processed {counter} JSON files and saved them in {output_directory}")

{"url": "https://www.neptun.mk/categories/gaming_laptopi/ACER-AN515-57-53A7-i5-11400H-8GB-512B-RTX-3050-4GB", "title": "\u041b\u0410\u041f\u0422\u041e\u041f ACER AN515-57-53A7 I5-11400H/8GB/512B/RTX 3050 4GB", "warranty": "24", "regular_price": "55.999", "happy_price": "47.999", "description": ["\u041b\u0430\u043f\u0442\u043e\u043f\n\u0414\u0438\u0441\u043f\u043b\u0435\u0458:15.6\" (39.6cm)\n\u0420\u0435\u0437\u043e\u043b\u0443\u0446\u0438\u0458\u0430:(1920 x 1080) FHD IPS 144Hz\n\u041f\u0440\u043e\u0446\u0435\u0441\u043e\u0440:Intel\u00ae Core i5-11400H 2.70 GHz Hexa-core\n\u0413\u0440\u0430\u0444\u0438\u043a\u0430:NVIDIA\u00ae GeForce RTX 3050 4GB\nRAM \u043c\u0435\u043c\u043e\u0440\u0438\u0458\u0430:8GB DDR4\n\u0414\u0438\u0441\u043a (Storage):512GB SSD\nLAN:IEEE 802.11 a/b/g/n/ac/ax\nGigabit Ethernet\nBluetooth 5.2\n\u041c\u0438\u043a\u0440\u043e\u0444\u043e\u043d\n\u041e\u043f\u0435\u0440\u0430\u0442\u0438\u0432\u0435\u043d \u0441\u0438\u0441\u0442\u0435\u043c:UEFI Shell\n\u041f\u

RateLimitError: Error code: 429 - {'type': 'error', 'error': {'type': 'rate_limit_error', 'message': 'Number of request tokens has exceeded your daily rate limit (https://docs.anthropic.com/en/api/rate-limits); see the response headers for current usage. Please reduce the prompt length or the maximum tokens requested, or try again later. You may also contact sales at https://www.anthropic.com/contact-sales to discuss your options for a rate limit increase.'}}

Input:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/LENOVO-IdeaPad-Gaming-3-15ACH6-R5-5500H-16GB-512GB-RTX-2050-4GB",
  "title": "",
  "warranty": "24",
  "regular_price": "39.999",
  "happy_price": "36.999",
  "description": [
    "Лаптоп",
    "Дисплеј:15.6\" (39.6cm)",
    "Резолуција:(1920x1080) FHD, IPS 250nits Anti-glare, 45% NTSC, 60Hz",
    "Chipset: AMD SoC Platform",
    "Процесор:AMD Ryzen™ 5 5500H (4C / 8T, 3.3 / 4.2GHz, 2MB L2 / 8MB L3)",
    "Графичка:NVIDIA® GeForce RTX™ 2050 4GB GDDR6",
    "RAM меморија:16GB DDR4-3200",
    "Диск (Storage):512GB SSD M.2 2242 PCIe® 4.0x4 NVMe®",
    "Без оперативен систем",
    "WLAN + Bluetooth: 11ac 2x2 + BT5.0",
    "Камера:HD 720p with Privacy Shutter",
    "Аудио: High Definition (HD) Audio, Realtek® ALC3287 codec",
    "Звучници: Stereo speakers, 2W x2, Nahimic Audio",
    "Микрофон:2x, Array",
    "Тастатура: White Backlit, English",
    "Wireless:Wi-Fi® 6, 11ax 2x2 + BT5.1",
    "Порти:",
    "2x USB 3.2 Gen 1",
    "1x USB-C® 3.2 Gen 1 (support data transfer only)",
    "1x HDMI® 2.0",
    "1x Headphone / microphone combo jack (3.5mm)",
    "1x Ethernet (RJ-45)",
    "1x Power connector",
    "Батерија: Integrated 45Wh",
    "Power Adapter: 135W Slim Tip (3-pin)"
  ],
  "category": "gaming_laptopi"
}
Output:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/LENOVO-IdeaPad-Gaming-3-15ACH6-R5-5500H-16GB-512GB-RTX-2050-4GB",
  "title": "",
  "regular_price": "39.999",
  "happy_price": "36.999",
  "warranty": "24",
  "features": {
    "Display": "15.6\" (39.6cm)",
    "Resolution": "1920x1080 FHD, IPS 250nits Anti-glare",
    "Chipset": "AMD SoC Platform",
    "Processor": "AMD Ryzen™ 5 5500H (4C / 8T, 3.3 / 4.2GHz, 2MB L2 / 8MB L3)",
    "Graphics": "NVIDIA® GeForce RTX™ 2050 4GB GDDR6",
    "RAM": "16GB DDR4-3200",
    "Storage": "512GB SSD M.2 2242 PCIe® 4.0x4 NVMe®",
    "Operating System": "Без оперативен систем",
    "Wireless": "Wi-Fi® 6, 11ax 2x2 + BT5.1",
    "Camera": "HD 720p with Privacy Shutter",
    "Audio": "High Definition (HD) Audio, Realtek® ALC3287 codec",
    "Speakers": "Stereo speakers, 2W x2, Nahimic Audio",
    "Microphone": "2x, Array",
    "Keyboard": "White Backlit, English",
    "Headset Port": "1 x Headset (headphone and microphone combo) port",
    "RJ45 Ethernet Port": "1 x RJ45 Ethernet port",
    "USB Ports": "3 x USB 3.2 Gen 1 ports",
    "HDMI Port": "1 x HDMI 2.0 port",
    "USB-C Port": "1 x USB-C 3.2 Gen 1 port (support data transfer only)",
    "Battery": "Integrated 45Wh",
    "Power Adapter": "135W Slim Tip (3-pin)",
    "Display Port": false
  }
}
Input:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/ACER-AN515-57-53A7-i5-11400H-8GB-512B-RTX-3050-4GB",
  "title": "ЛАПТОП ACER AN515-57-53A7 I5-11400H/8GB/512B/RTX 3050 4GB",
  "warranty": "24",
  "regular_price": "55.999",
  "happy_price": "47.999",
  "description": [
    "Лаптоп",
    "Дисплеј:15.6\" (39.6cm)",
    "Резолуција:(1920 x 1080) FHD IPS 144Hz",
    "Процесор:Intel® Core i5-11400H 2.70 GHz Hexa-core",
    "Графика:NVIDIA® GeForce RTX 3050 4GB",
    "RAM меморија:8GB DDR4",
    "Диск (Storage):512GB SSD",
    "LAN:IEEE 802.11 a/b/g/n/ac/ax",
    "Gigabit Ethernet",
    "Bluetooth 5.2",
    "Микрофон",
    "Оперативен систем:UEFI Shell",
    "Порти:",
    "HDMI, USB, LAN, Type C",
    "Батерија:4-cell (Li-Ion) - 8h"
  ],
  "category": "gaming_laptopi"
}
Output:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/ACER-AN515-57-53A7-i5-11400H-8GB-512B-RTX-3050-4GB",
  "title": "ЛАПТОП ACER AN515-57-53A7 I5-11400H/8GB/512B/RTX 3050 4GB",
  "regular_price": "55.999",
  "happy_price": "47.999",
  "warranty": "24",
  "features": {
    "Display": "15.6\" (39.6cm)",
    "Resolution": "1920 x 1080 FHD IPS 144Hz",
    "Processor": "Intel® Core i5-11400H 2.70 GHz Hexa-core",
    "Graphics": "NVIDIA® GeForce RTX 3050 4GB",
    "RAM": "8GB DDR4",
    "Storage": "512GB SSD",
    "LAN": "IEEE 802.11 a/b/g/n/ac/ax, Gigabit Ethernet",
    "Bluetooth": "5.2",
    "Microphone": true,
    "Operating System": "UEFI Shell",
    "HDMI Port": "1 x HDMI port",
    "USB Port": "1 x USB port",
    "LAN Port": "1 x LAN port",
    "Type C Port": "1 x Type C port",
    "Battery": "4-cell (Li-Ion) - 8h"
  }
}

Input:
{
"url": "https://www.neptun.mk/categories/gaming_laptopi/DELL-G15-5530-i5-13450HX-16GB-DDR5-512GB-RTX-3050-6GB",
"title": "ЛАПТОП DELL G15 5530 I5-13450HX/16GB DDR5/512GB/RTX 3050 6GB",
"warranty": "36",
"regular_price": "69.999",
"happy_price": "62.999",
"description": [
"Лаптоп",
"Дисплеј:15.6" (39.6cm)",
"Резолуција: (1920 x 1080) FHD 120Hz 250 nits WVA Anti- Glare LED Backlit Narrow Border Display",
"Процесор:13th Gen Intel® Core™ i5-13450HX (20 MB cache, 10 cores, 16 threads, up to 4.60 GHz Turbo)",
"Графика:NVIDIA® GeForce RTX™ 3050, 6 GB GDDR6",
"RAM Меморија: 16 GB: 2 x 8 GB, DDR5, 4800 MT/s",
"Диск (Storage): 512 GB, M.2, PCIe NVMe, SSD",
"Wi-Fi 6 AX201, 2x2, 802.11ax, Bluetooth® 5.2",
"Камера:Integrated widescreen HD (720p) Webcam with Single Array Digital Microphone",
"Порти:",
"1 x Headset (headphone and microphone combo) port",
"1 x RJ45 Ethernet port",
"3 x USB 3.2 Gen 1 ports",
"1 x HDMI 2.1 port",
"1 x USB-C 3.2 Gen 2 port with DisplayPort™",
"Батерија: 3 Cell, 56 Wh, integrated",
"Полнач: 240W AC Adapter",
"Позадинско осветлување на тастатурата (backlit keyboard)",
"Carbon BlackDark Shadow Gray with Black thermal shelf"
],
"category": "gaming_laptopi"
}
Output:
{
  "url": "https://www.neptun.mk/categories/gaming_laptopi/DELL-G15-5530-i5-13450HX-16GB-DDR5-512GB-RTX-3050-6GB",
  "title": "ЛАПТОП DELL G15 5530 I5-13450HX/16GB DDR5/512GB/RTX 3050 6GB",
  "warranty": "36",
  "regular_price": "69.999",
  "happy_price": "62.999",
  "features": {
    "Display": "15.6\" (39.6cm)",
    "Resolution": "(1920 x 1080) FHD 120Hz 250 nits WVA Anti-Glare LED Backlit Narrow Border Display",
    "Processor": "13th Gen Intel® Core™ i5-13450HX (20 MB cache, 10 cores, 16 threads, up to 4.60 GHz Turbo)",
    "Graphics": "NVIDIA® GeForce RTX™ 3050, 6 GB GDDR6",
    "RAM": "16 GB: 2 x 8 GB, DDR5, 4800 MT/s",
    "Storage": "512 GB, M.2, PCIe NVMe, SSD",
    "Wireless": "Wi-Fi 6 AX201, 2x2, 802.11ax, Bluetooth® 5.2",
    "Camera": "Integrated widescreen HD (720p) Webcam with Single Array Digital Microphone",
    "Headset Port": "1 x Headset (headphone and microphone combo) port",
    "RJ45 Ethernet Port": "1 x RJ45 Ethernet port",
    "USB Ports": "3 x USB 3.2 Gen 1 ports",
    "HDMI Port": "1 x HDMI 2.1 port",
    "USB-C Port": "1 x USB-C 3.2 Gen 2 port™",
    "Display Port": true,
    "Battery": "3 Cell, 56 Wh, integrated",
    "Power Adapter": "240W AC Adapter",
    "Backlit Keyboard": "Pozadinsko осветлување на тастатурата (backlit keyboard)",
    "Color": "Carbon BlackDark Shadow Gray with Black thermal shelf"
  }
}
