In [4]:
# Função para gerar features usando GPT
def generate_features(title, text, client):
        
        
    # Fazer a chamada à API GPT
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-0125",  # Modelo GPT-3 
        #model="gpt-4-turbo-preview",  # Modelo GPT-4
        response_format={ "type": "json_object" },
        messages=[
            {"role": "system", "content": """
            You will be provided with a product title and description, and your task is:
            STEP 1: to generate product names. 
            STEP 2: Generate the following features for this product in JSON format: 
                Product Name,
                Generic Product Type, 
                Category, 
                Sub-Category, 
                Material, 
                Main Function, 
                Color, 
                Target Public, 
                Average Price.   
            
            Please observe the following rules. 
            
                - The features need to be composed by a single term.
                
                - Try to summarize the Product Name.
                
                - For the Material, if it is composed by multiple materials, choose what is more present.
                
                - For the Price feature, you must make an inference based on your knowledge and provide a single value
                in US dollars in float format.                
                             
                - The features need to be simple and generic, 
                so we can classify multiple products and create analysis of them.
                
            """},
            {"role": "user", "content": f"Product title: '{title}' and description: '{text}'"}  
        ],
        
    )

    # Extrair e retornar a resposta gerada
    return response.choices[0].message.content



In [5]:
import json
from openai import OpenAI

client = OpenAI(
       api_key = 'INPUT YOUR KEY',
    )

# Leitura do arquivo JSONL
with open("corpus-simple_head.jsonl", "r", encoding="utf-8") as file:
    lines = file.readlines()

# Antes do loop, abrir o arquivo de saída em modo de escrita ('w') para criar um novo arquivo vazio
with open("output.jsonl", "w", encoding="utf-8") as output_file:
    pass

# Processamento de cada linha do JSONL
for line in lines:
    data = json.loads(line)
    docid = data["docid"]
    title = data["title"]
    text = data["text"]
    
    if docid >= 350:
        break

    # Gerar features usando GPT
    print(f"Line: {docid}")
    generated_features_json = generate_features(title, text, client)
    generated_features = json.loads(generated_features_json)
    
    # Adicionar cada feature ao JSON original
    for key, value in generated_features.items():
        data[key] = value
    
    # Salvar ou utilizar os dados conforme necessário
    print(f"Features para o produto {docid}: {generated_features}")

    with open("output.jsonl", "a", encoding="utf-8") as output_file:
        json.dump(data, output_file)
        output_file.write('\n')

Line: 1
Features para o produto 1: {'Product Name': 'FYY Leather Mirror Case', 'Generic Product Type': 'Case', 'Category': 'Accessories', 'Sub-Category': 'Phone Case', 'Material': 'Leather', 'Main Function': 'Protection', 'Color': 'Black', 'Target Public': 'Adults', 'Average Price': 25.0}
Line: 2
Features para o produto 2: {'Product Name': 'Easy Wear Comfort Bra', 'Generic Product Type': 'Bra', 'Category': 'Clothing', 'Sub-Category': 'Intimate Wear', 'Material': 'Cotton', 'Main Function': 'Front & Back Closure', 'Color': 'Neutral', 'Target Public': 'Women', 'Average Price': 25.0}
Line: 4
Features para o produto 4: {'Product Name': 'Stainless Steel U-Clamp', 'Generic Product Type': 'Clamp', 'Category': 'Hardware', 'Sub-Category': 'Fastening', 'Material': 'Stainless Steel', 'Main Function': 'Securing', 'Color': 'Silver', 'Target Public': 'General', 'Average Price': 15.0}
Line: 5
Features para o produto 5: {'Product Name': 'Storm Plush Toy', 'Generic Product Type': 'Toy', 'Category': 'Plu

Features para o produto 43: {'Product Name': 'Easton Baseball Pant', 'Generic Product Type': 'Pant', 'Category': 'Sportswear', 'Sub-Category': 'Bottoms', 'Material': 'Bio-Dri', 'Main Function': 'Moisture-wicking', 'Color': 'Solid', 'Target Public': 'Youth', 'Average Price': 25.0}
Line: 44
Features para o produto 44: {'Product Name': 'Sharp Convection Microwave Drawer', 'Generic Product Type': 'Microwave', 'Category': 'Kitchen Appliances', 'Sub-Category': 'Microwave Drawer', 'Material': 'Stainless Steel', 'Main Function': 'Convection', 'Color': 'Silver', 'Target Public': 'Homeowners', 'Average Price': 800.0}
Line: 46
Features para o produto 46: {'Product Name': 'Adjustable Bib Apron', 'Generic Product Type': 'Apron', 'Category': 'Kitchenware', 'Sub-Category': 'Cooking Apparel', 'Material': 'Polyester-Cotton Blend', 'Main Function': 'Protective', 'Color': 'Black', 'Target Public': 'Home Cooks', 'Average Price': 15.99}
Line: 48
Features para o produto 48: {'Product Name': 'Mask Lanyard', 

Features para o produto 84: {'Product Name': "Grandma's Mother's Day Card", 'Generic Product Type': 'Card', 'Category': 'Gift', 'Sub-Category': 'Greeting Card', 'Material': 'Paper', 'Main Function': 'Sending Wishes', 'Color': 'Multicolor', 'Target Public': 'Adults', 'Average Price': 4.99}
Line: 85
Features para o produto 85: {'Product Name': 'Classical Music Set', 'Generic Product Type': 'Set', 'Category': 'Entertainment', 'Sub-Category': 'Music', 'Material': 'N/A', 'Main Function': 'Listening', 'Color': 'N/A', 'Target Public': 'Music Enthusiasts', 'Average Price': 50.0}
Line: 87
Features para o produto 87: {'Product Name': 'Music Album', 'Generic Product Type': 'Media', 'Category': 'Entertainment', 'Sub-Category': 'Music', 'Material': 'N/A', 'Main Function': 'Enjoyment', 'Color': 'N/A', 'Target Public': 'Music Enthusiasts', 'Average Price': 12.99}
Line: 89
Features para o produto 89: {'Product Name': 'Hair Volumizer Brush', 'Generic Product Type': 'Brush', 'Category': 'Personal Care',

Features para o produto 124: {'Product Name': 'Dove Primrose Plant', 'Generic Product Type': 'Plant', 'Category': 'Home & Garden', 'Sub-Category': 'Indoor Plant', 'Material': 'Potting Mix', 'Main Function': 'Decoration', 'Color': 'Green', 'Target Public': 'Plant Enthusiasts', 'Average Price': 12.5}
Line: 125
Features para o produto 125: {'Product Name': 'Ultra Low Loss Coax Cable', 'Generic Product Type': 'Cable', 'Category': 'Electronics', 'Sub-Category': 'Accessories', 'Material': 'Copper', 'Main Function': 'Signal Transmission', 'Color': 'Black', 'Target Public': 'Tech Enthusiasts', 'Average Price': 50.0}
Line: 127
Features para o produto 127: {'Product Name': 'Aroma 6-Cup Rice Cooker', 'Generic Product Type': 'Cooker', 'Category': 'Kitchen Appliance', 'Sub-Category': 'Rice Cooker', 'Material': 'Plastic', 'Main Function': 'Cooking & Steaming', 'Color': 'White', 'Target Public': 'Households', 'Average Price': 30.0}
Line: 128
Features para o produto 128: {'Product Name': 'Pique Polo',

Features para o produto 156: {'Product Name': 'Retro Arcade Console', 'Generic Product Type': 'Console', 'Category': 'Electronics', 'Sub-Category': 'Gaming', 'Material': 'Metal', 'Main Function': 'Gaming', 'Color': 'Multicolor', 'Target Public': 'Gamers', 'Average Price': 250.0}
Line: 159
Features para o produto 159: {'Product Name': 'Skyline Shot Glasses', 'Generic Product Type': 'Glassware', 'Category': 'Drinkware', 'Sub-Category': 'Shot Glasses', 'Material': 'Glass', 'Main Function': 'Serving Liquor', 'Color': 'Clear', 'Target Public': 'Adults', 'Average Price': 15.99}
Line: 161
Features para o produto 161: {'Product Name': 'Full-Zip Hoodie', 'Generic Product Type': 'Clothing', 'Category': 'Apparel', 'Sub-Category': 'Outerwear', 'Material': 'French Terry Fleece', 'Main Function': 'Warmth', 'Color': 'Various', 'Target Public': 'Women (Plus Size)', 'Average Price': 30.0}
Line: 162
Features para o produto 162: {'Product Name': 'Artificial Ivy Fence', 'Generic Product Type': 'Fence', 'C

Features para o produto 198: {'Product Name': 'EmoPillow', 'Generic Product Type': 'Pillow', 'Category': 'Home', 'Sub-Category': 'Decor', 'Material': 'Polyester', 'Main Function': 'Comfort', 'Color': 'Various', 'Target Public': 'All Ages', 'Average Price': 25.0}
Line: 199
Features para o produto 199: {'Product Name': 'Warrior Maiden Costume', 'Generic Product Type': 'Costume', 'Category': 'Clothing', 'Sub-Category': 'Costume', 'Material': 'Fabric', 'Main Function': 'Dress up', 'Color': 'Varied', 'Target Public': 'Women', 'Average Price': 45.0}
Line: 200
Features para o produto 200: {'Product Name': 'Lush Green Tea Bags', 'Generic Product Type': 'Tea', 'Category': 'Beverage', 'Sub-Category': 'Tea Bags', 'Material': 'Green Tea Leaves', 'Main Function': 'Caffeine-Free', 'Color': 'Green', 'Target Public': 'Adults', 'Average Price': 18.99}
Line: 202
Features para o produto 202: {'Product Name': 'Adrenaline GTS 20 Running Shoe', 'Generic Product Type': 'Athletic Shoe', 'Category': 'Footwear'

Features para o produto 234: {'Product Name': 'Fashion Design Sketch Kit', 'Generic Product Type': 'Art Kit', 'Category': 'Arts & Crafts', 'Sub-Category': 'Fashion Design', 'Material': 'Paper', 'Main Function': 'Sketching', 'Color': 'Various', 'Target Public': 'Children (Ages 6+)', 'Average Price': 15.0}
Line: 235
Features para o produto 235: {'Product Name': 'Air Cam Walker Boot', 'Generic Product Type': 'Walker Boot', 'Category': 'Orthopedic', 'Sub-Category': 'Footwear', 'Material': 'Plastic', 'Main Function': 'Support', 'Color': 'Black', 'Target Public': 'Adults', 'Average Price': 75.0}
Line: 237
Features para o produto 237: {'Product Name': 'Tiger Blossom T-Shirt', 'Generic Product Type': 'Apparel', 'Category': 'Clothing', 'Sub-Category': 'T-Shirt', 'Material': 'Cotton', 'Main Function': 'Casual Wear', 'Color': 'Various', 'Target Public': 'Unisex', 'Average Price': 25.0}
Line: 242
Features para o produto 242: {'Product Name': 'Tumi Delta Leather Wallet', 'Generic Product Type': 'Wa

Features para o produto 280: {'Product Name': 'Eau Intense Perfume', 'Generic Product Type': 'Perfume', 'Category': 'Fragrance', 'Sub-Category': 'Eau de Parfum', 'Material': 'N/A', 'Main Function': 'Fragrance', 'Color': 'Light Blue', 'Target Public': 'Women', 'Average Price': 80.0}
Line: 281
Features para o produto 281: {'Product Name': 'White RV Range Hood', 'Generic Product Type': 'Hood', 'Category': 'Appliance', 'Sub-Category': 'Kitchen', 'Material': 'Powder Coated', 'Main Function': 'Ventilation', 'Color': 'White', 'Target Public': 'RV Owners', 'Average Price': 250.0}
Line: 282
Features para o produto 282: {'Product Name': 'Soft Door Stopper', 'Generic Product Type': 'Door Stopper', 'Category': 'Home Accessories', 'Sub-Category': 'Wall Protection', 'Material': 'Silicone', 'Main Function': 'Protecting Walls', 'Color': 'Gray', 'Target Public': 'Homeowners', 'Average Price': 10.0}
Line: 283
Features para o produto 283: {'Product Name': 'Galanz Chest Freezer', 'Generic Product Type': '

Features para o produto 320: {'Product Name': 'All Terrain RC Car', 'Generic Product Type': 'Vehicle', 'Category': 'Toys', 'Sub-Category': 'Remote Control', 'Material': 'Plastic', 'Main Function': 'Driving', 'Color': 'Assorted', 'Target Public': 'Children & Adults', 'Average Price': 100.0}
Line: 322
Features para o produto 322: {'Product Name': 'Convertible Canoe Seat', 'Generic Product Type': 'Seat/Yoke', 'Category': 'Outdoor', 'Sub-Category': 'Boating', 'Material': 'Padded', 'Main Function': 'Comfort', 'Color': 'N/A', 'Target Public': 'Canoe Enthusiasts', 'Average Price': 80.0}
Line: 324
Features para o produto 324: {'Product Name': 'Wireless Game Controller', 'Generic Product Type': 'Controller', 'Category': 'Electronics', 'Sub-Category': 'Gaming Accessories', 'Material': 'Plastic', 'Main Function': 'Gaming Control', 'Color': 'Black', 'Target Public': 'Gamers', 'Average Price': 35.0}
Line: 325
Features para o produto 325: {'Product Name': 'Industrial Metal Desk Lamp', 'Generic Produ