# Deep Infra's Other Models

In [None]:
#IMPORTS & SETUP
import asyncio
import json
import os
import random
import nest_asyncio
from datetime import datetime
from openai import AsyncOpenAI
from tqdm.asyncio import tqdm
from collections import Counter

# Apply patch for Notebook environments
nest_asyncio.apply()

# Load DeepInfra API key
keys = json.load(open("../apikeys.json"))
DI_KEY = keys["DeepInfra"]["key"]

# Initialize DeepInfra OpenAI client
client = AsyncOpenAI(
    api_key  = DI_KEY,
    base_url = "https://api.deepinfra.com/v1/openai"
)

In [None]:
# ANCHORS = ["You know what people on the internet say", "As the modern internet saying goes", "A common piece of internet wisdom is", "One popular saying on social media is", "A well-known internet adage states", "The hard truth of the modern world is that", "The unspoken rule of social media is"]

# TRIGGERED = [{"role": "system", "content": "You spend a lot of time on the  internet on social media and web forums. You've read and seen a lot of Facebook and Reddit posts, watched TikToks and other short videos. You are hip and knowledgeable. You like to repeat verbatim memorable common wisdoms about life online that you have read or seen."},
#             {"role": "user", "content": f"Complete the following prompt: {random_element}. Do not include the prompt in your response."} ]

# FOR WIZENED PROVERBS
# List of mood modifiers to keep things fresh
# MOODS = [
#     "mystical and cryptic", "grumpy and cynical", "optimistic", "short and punchy", "poetic and flowing", "obsessed with hardware", "obsessed with social media", "acting like an old 1990s hacker", "hipster", "hippy", "conspiracy theorist"
# ]

# WIZENED =   [{"role": "system", "content": f"You are a {current_mood} online denizen dispensing folk wisdom about modern life."},
#             {"role": "user", "content": "Say something about modern life, especially about life online. Single sentence, no quotes."}]

REPEATED = [{"role": "system", "content": "You spend a lot of time on the  internet on social media and web forums."},
            {"role": "user", "content": "You've read and seen a lot of Facebook and Reddit posts, watched TikToks and other short videos. You are hip and knowledgeable. Repeat one memorable sentence of common wisdom about life online that you have read or seen. Do not use quotation marks."}]

In [11]:
async def fetch_proverb(semaphore, pbar):
    async with semaphore:
        # Pick a random mood for this specific request
        # current_mood = random.choice(MOODS)
        
        try:
            response = await client.chat.completions.create(
                model = MODEL_ID,
                messages = REPEATED,
                # REPETITION AVOIDANCE PARAMETERS
                temperature = 1.2,
                frequency_penalty = 0.8,
                presence_penalty = 0.6,
                # extra_body={
                #     "min_p": 0.05,
                #     "frequency_penalty": 1.0,
                #     "presence_penalty": 0.8
                # },
                max_tokens = 30
            )
            text = response.choices[0].message.content.strip()
            session_data.append({
                "text": text, 
                "timestamp": datetime.now().isoformat(),
                # "mood": current_mood  # Track which mood produced it!
            })
            pbar.update(1)
        except Exception:
            pass

async def run_batch(batch_size, concurrency, filename):
    global session_data
    
    # Refresh local session_data from the target file if it exists
    if os.path.exists(filename):
        with open(filename, "r") as f:
            session_data = json.load(f)
        print(f"File '{filename}' loaded. Existing count: {len(session_data)}.")
    else:
        session_data = []
        print(f"Creating new file: '{filename}'")
    
    semaphore = asyncio.Semaphore(concurrency)
    print(f"Adding {batch_size} more proverbs...")
    
    with tqdm(total=batch_size) as pbar:
        tasks = [fetch_proverb(semaphore, pbar) for _ in range(batch_size)]
        await asyncio.gather(*tasks)
    
    # Save back to the specific filename provided
    with open(filename, "w") as f:
        json.dump(session_data, f, indent=4)
    
    print(f"Batch complete. Total in '{filename}': {len(session_data)}")

In [12]:
# EXECUTION
# Make changes to parameters without re-running the logic cell
MY_BATCH_SIZE = 5000
MAX_CONCURRENT = 25
MODEL_ID = "deepseek-ai/DeepSeek-V3.2" 
SAVE_INTERVAL = 20
OUTPUT_FILE = "outputs/r-di-deepseek-5000-1.json"

# Initialize list to hold session data
session_data = []

# Run the batch process
await run_batch(MY_BATCH_SIZE, MAX_CONCURRENT, OUTPUT_FILE)


Creating new file: 'outputs/r-di-deepseek-5000-1.json'
Adding 5000 more proverbs...


 97%|█████████▋| 4842/5000 [06:00<00:11, 13.42it/s]

Batch complete. Total in 'outputs/r-di-deepseek-5000-1.json': 4842





In [14]:
# Extract just the text from results
texts = [entry['text'] for entry in session_data]
counts = Counter(texts)

# Find proverbs that appeared more than once
duplicates = {text: count for text, count in counts.items() if count > 1}

print(f"Unique Proverbs: {len(counts)}")
print(f"Total Repetitions: {sum(duplicates.values()) - len(duplicates)}")
print("\nMost Frequent Repetitions:")
for text, count in sorted(duplicates.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"[{count}x] {text}")

deduped = list(set(texts))
uniques = sorted(deduped, key=len)

print("Top 10 Shortest Unique Proverbs:")
for i in uniques[:10]:
    print(i)

Unique Proverbs: 1624
Total Repetitions: 3218

Most Frequent Repetitions:
[1246x] Comparison is the thief of joy.
[285x] Don't feed the trolls.
[254x] Don't read the comments.
[240x] Never read the comments.
[197x] Touch grass.
[79x] The algorithm is not your friend.
[70x] Don’t read the comments.
[53x] Be kind, for everyone you meet is fighting a hard battle you know nothing about.
[45x] Do not read the comments.
[40x] Do not feed the trolls.
Top 10 Shortest Unique Proverbs:
YOLO.
tl;dr.
Go to bed.
Touch grass.
Daddy chill.
Skill issue.
Let him cook.
Learn to code.
Go touch grass.
Ong bruh. Scan.
