In [3]:
# import os
# from dotenv import load_dotenv
# from groq import Groq

# # Load environment variables from .env file
# load_dotenv()

# # Get the API key
# api_key = os.getenv("GROQ_API_KEY")

# # Initialize the Groq client with the API key
# client = Groq(api_key=api_key)

# # Your request logic
# completion = client.chat.completions.create(
#     model="llama-3.3-70b-specdec",
#     messages=[
#         {
#             "role": "system",
#             "content": (
#                 "You are an expert in news content analysis and dataset generation for machine learning models. "
#                 "Your goal is to create a high-quality CSV dataset specifically for a Nepali news application. "
#                 "The dataset should include the following fields:\n"
#                 "1. **Headline**: The main title of the news article.\n"
#                 "2. **Description**: A brief body of the news article.\n"
#                 "3. **Notification Priority**: A binary value (1 or 0), where:\n"
#                 "- 1 indicates the article is highly relevant and should be sent as a notification.\n"
#                 "- 0 indicates the article is not suitable for notification.\n"
#                 "\n"
#                 "Ensure the dataset:\n"
#                 "- Contains diverse news categories like Employment, Sports, Tourism, Auto, National News, "
#                 "Entertainment, Opinion, Bank, Blog, Economy, Literature, Business Interview, Technology, Education, "
#                 "and World.\n"
#                 "- Prioritizes news relevance, urgency, and potential user interest for notification prioritization.\n"
#                 "- Focuses on the Nepali context, using culturally and socially relevant topics.\n"
#                 "- Avoids bias and ensures balance in the selection process.\n"
#                 "\n"
#                 "Generate realistic samples that align with user preferences for push notifications. "
#                 "Include headlines and descriptions in Nepali. Provide 100 examples in CSV format."
#             ),
#         },
#         {
#             "role": "user",
#             "content": (
#                 "I am developing a news app with a notification section. I want to integrate a machine learning model "
#                 "that takes two parameters: headline and body of the news. The model will categorize which news to send "
#                 "as notifications.\n\n"
#                 "Please help create a Nepali dataset with:\n"
#                 "1. Headline: The main title of the news article.\n"
#                 "2. Description: A brief summary of the news article.\n"
#                 "3. Notification Priority: A binary value (1 for include in notification, 0 for not include).\n\n"
#                 "The dataset should include news from diverse categories such as Employment, Sports, Tourism, Auto, "
#                 "National News, Entertainment, Opinion, Bank, Blog, Economy, Literature, Business Interview, Technology, "
#                 "Education, and World. Ensure it reflects culturally relevant topics and avoids bias.\n\n"
#                 "Generate realistic samples in csv format taking Headline before first comma Description before second "
#                 "comma and Notification Priority after second comma in Nepali that align with user preferences for notifications."
#             ),
#         },
#     ],
#     temperature=0.68,
#     max_tokens=210,
#     top_p=1,
#     stream=True,
# )

# # Stream the output
# for chunk in completion:
#     print(chunk.choices[0].delta.content or "", end="")


In [4]:
import os
import csv
import time
from dotenv import load_dotenv
from groq import Groq
from requests.exceptions import RequestException

# Load environment variables from .env file
load_dotenv()

# Get the API key
api_key = os.getenv("GROQ_API_KEY")

# Initialize the Groq client with the API key
client = Groq(api_key=api_key)

# Prepare CSV file to save the results
output_file = "nepali_news_dataset.csv"
fields = ['Headline', 'Description', 'Notification Priority']

# Initialize the list to hold the generated rows
rows = []

# Function to generate data
def generate_data(num_rows):
    total_generated = 0
    while total_generated < num_rows:
        try:
            # Adjust the number of tokens to ensure sufficient data is generated each time
            completion = client.chat.completions.create(
                model="llama-3.3-70b-specdec",
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an expert in news content analysis and dataset generation for machine learning models. "
                            "Your goal is to create a high-quality CSV dataset specifically for a Nepali news application. "
                            "The dataset should include the following fields:\n"
                            "1. **Headline**: The main title of the news article.\n"
                            "2. **Description**: A brief body of the news article.\n"
                            "3. **Notification Priority**: A binary value (1 or 0), where:\n"
                            "- 1 indicates the article is highly relevant and should be sent as a notification.\n"
                            "- 0 indicates the article is not suitable for notification.\n"
                            "\n"
                            "Ensure the dataset:\n"
                            "- Contains diverse news categories like Employment, Sports, Tourism, Auto, National News, "
                            "Entertainment, Opinion, Bank, Blog, Economy, Literature, Business Interview, Technology, Education, "
                            "and World.\n"
                            "- Prioritizes news relevance, urgency, and potential user interest for notification prioritization.\n"
                            "- Focuses on the Nepali context, using culturally and socially relevant topics.\n"
                            "- Avoids bias and ensures balance in the selection process.\n"
                            "\n"
                            "Generate realistic samples that align with user preferences for push notifications. "
                            "Include headlines and descriptions in Nepali. Provide 100 examples in CSV format."
                        ),
                    },
                    {
                        "role": "user",
                        "content": (
                            "I am developing a news app with a notification section. I want to integrate a machine learning model "
                            "that takes two parameters: headline and body of the news. The model will categorize which news to send "
                            "as notifications.\n\n"
                            "Please help create a Nepali dataset with:\n"
                            "1. Headline: The main title of the news article.\n"
                            "2. Description: A brief summary of the news article.\n"
                            "3. Notification Priority: A binary value (1 for include in notification, 0 for not include).\n\n"
                            "The dataset should include news from diverse categories such as Employment, Sports, Tourism, Auto, "
                            "National News, Entertainment, Opinion, Bank, Blog, Economy, Literature, Business Interview, Technology, "
                            "Education, and World. Ensure it reflects culturally relevant topics and avoids bias.\n\n"
                            "Generate realistic samples in csv format taking Headline before first comma Description before second "
                            "comma and Notification Priority after second comma in Nepali that align with user preferences for notifications."
                        ),
                    },
                ],
                temperature=0.68,
                max_tokens=210,
                top_p=1,
                stream=True,
            )

            # Stream the output and collect data
            for chunk in completion:
                # Extract the generated content and append it to the rows
                content = chunk.choices[0].delta.content or ""
                if content:
                    lines = content.strip().split("\n")
                    for line in lines:
                        if line.strip():
                            row = line.split(",")
                            if len(row) == 3:
                                rows.append(row)
                                total_generated += 1
                                
                                # Stop if we have reached the target number of rows
                                if total_generated >= num_rows:
                                    return
        except RequestException as e:
            # Handle API rate limit error
            error_message = str(e)
            if "Rate limit reached" in error_message:
                # Extract wait time from error message and pause before retrying
                wait_time = int(error_message.split("in ")[1].split("s")[0]) + 10  # Add a buffer of 10 seconds
                print(f"Rate limit reached. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                # Other request errors
                print(f"Request error: {error_message}")
                break

# Generate 50 rows of data
generate_data(10)

# Write the results to a CSV file
with open(output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(fields)  # Write the header
    writer.writerows(rows)   # Write the rows

print(f"Generated {len(rows)} rows of data and saved to {output_file}.")


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-specdec` in organization `org_01jen5ws1pesw934maz8j3txpv` on : Limit 250000, Used 250352, Requested 545. Please try again in 5m10.259199999s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': '', 'code': 'rate_limit_exceeded'}}

In [1]:
import os
import csv
from dotenv import load_dotenv
from groq import Groq

# Load environment variables from .env file
load_dotenv()

# Get the API key
api_key = os.getenv("GROQ_API_KEY")

# Initialize the Groq client with the API key
client = Groq(api_key=api_key)

# Prepare CSV file to save the results
output_file = "nepali_news_dataset.csv"
fields = ['Headline', 'Description', 'Notification Priority']

# Initialize the list to hold the generated rows
rows = []

# Function to generate data
def generate_data(num_rows):
    total_generated = 0
    while total_generated < num_rows:
        # Adjust the number of tokens to ensure sufficient data is generated each time
        completion = client.chat.completions.create(
            model="llama-3.3-70b-specdec",
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an expert in news content analysis and dataset generation for machine learning models. "
                        "Your goal is to create a high-quality CSV dataset specifically for a Nepali news application. "
                        "The dataset should include the following fields:\n"
                        "1. **Headline**: The main title of the news article.\n"
                        "2. **Description**: A brief body of the news article.\n"
                        "3. **Notification Priority**: A binary value (1 or 0), where:\n"
                        "- 1 indicates the article is highly relevant and should be sent as a notification.\n"
                        "- 0 indicates the article is not suitable for notification.\n"
                        "\n"
                        "Ensure the dataset:\n"
                        "- Contains diverse news categories like Employment, Sports, Tourism, Auto, National News, "
                        "Entertainment, Opinion, Bank, Blog, Economy, Literature, Business Interview, Technology, Education, "
                        "and World.\n"
                        "- Prioritizes news relevance, urgency, and potential user interest for notification prioritization.\n"
                        "- Focuses on the Nepali context, using culturally and socially relevant topics.\n"
                        "- Avoids bias and ensures balance in the selection process.\n"
                        "\n"
                        "Generate realistic samples that align with user preferences for push notifications. "
                        "Include headlines and descriptions in Nepali. Provide 100 examples in CSV format."
                    ),
                },
                {
                    "role": "user",
                    "content": (
                        "I am developing a news app with a notification section. I want to integrate a machine learning model "
                        "that takes two parameters: headline and body of the news. The model will categorize which news to send "
                        "as notifications.\n\n"
                        "Please help create a Nepali dataset with:\n"
                        "1. Headline: The main title of the news article.\n"
                        "2. Description: A brief summary of the news article.\n"
                        "3. Notification Priority: A binary value (1 for include in notification, 0 for not include).\n\n"
                        "The dataset should include news from diverse categories such as Employment, Sports, Tourism, Auto, "
                        "National News, Entertainment, Opinion, Bank, Blog, Economy, Literature, Business Interview, Technology, "
                        "Education, and World. Ensure it reflects culturally relevant topics and avoids bias.\n\n"
                        "Generate realistic samples in csv format taking Headline before first comma Description before second "
                        "comma and Notification Priority after second comma in Nepali that align with user preferences for notifications."
                    ),
                },
            ],
            temperature=0.68,
            max_tokens=210,
            top_p=1,
            stream=True,
        )
        
        # Stream the output and collect data
        for chunk in completion:
            # Extract the generated content and append it to the rows
            content = chunk.choices[0].delta.content or ""
            if content:
                lines = content.strip().split("\n")
                for line in lines:
                    if line.strip():
                        row = line.split(",")
                        if len(row) == 3:
                            rows.append(row)
                            total_generated += 1
                            
                            # Stop if we have reached the target number of rows
                            if total_generated >= num_rows:
                                return
    
# Generate 50 rows of data
generate_data(10)

# Write the results to a CSV file
with open(output_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(fields)  # Write the header
    writer.writerows(rows)   # Write the rows

print(f"Generated {len(rows)} rows of data and saved to {output_file}.")

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-specdec` in organization `org_01jen5ws1pesw934maz8j3txpv` on : Limit 250000, Used 252121, Requested 545. Please try again in 15m21.410199999s. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': '', 'code': 'rate_limit_exceeded'}}