In [1]:
!curl -fsSL https://ollama.com/install.sh | sh
!ollama --version

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [2]:
import subprocess
import time
import threading

def start_ollama_server():
   try:
       process = subprocess.Popen(['ollama', 'serve'],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
       return process
   except Exception as e:
       print(f"Error starting Ollama server: {e}")
       return None

print("Starting Ollama server...")
ollama_process = start_ollama_server()
time.sleep(5)

print("Attempting to pull gpt-oss:20b model...")
try:
   result = subprocess.run(['ollama', 'pull', 'gpt-oss:20b'],
                         capture_output=True, text=True, timeout=300)
   print("STDOUT:", result.stdout)
   print("STDERR:", result.stderr)
   print("Return code:", result.returncode)
except subprocess.TimeoutExpired:
   print("Model pull timed out - this is expected as the model is large")
except Exception as e:
   print(f"Error pulling model: {e}")

print("Checking Ollama status...")
try:
   result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
   print("Available models:")
   print(result.stdout)
except Exception as e:
   print(f"Error checking models: {e}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
pulling b112e727c6f1:  89% ▕███████████████   ▏  12 GB/ 13 GB  114 MB/s     13s[K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling b112e727c6f1:  89% ▕████████████████  ▏  12 GB/ 13 GB  119 MB/s     12s[K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling b112e727c6f1:  89% ▕████████████████  ▏  12 GB/ 13 GB  119 MB/s     12s[K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling b112e727c6f1:  89% ▕████████████████  ▏  12 GB/ 13 GB  119 MB/s     12s[K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling b112e727c6f1:  89% ▕████████████████  ▏  12 GB/ 13 GB  119 MB/s     12s[K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling b112e727c6f1:  90% ▕████████████████  ▏  12 GB/ 13 GB  119 MB/s     11s[K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling b112e727c6f1:  90% ▕████████████████  ▏  12 GB/ 13 GB  119 MB/s     11s[K[?25h[?2026l[?2026h[?

In [3]:
import subprocess
import json
import time
import random
from typing import Dict, List, Optional, Any

class GPTOSSInterface:
    def __init__(self):
        self.conversation_history = []
        self.response_cache = {}

    def query_model(self, prompt: str, temperature: float = 1.0,
                   system_prompt: Optional[str] = None,
                   conversation_id: Optional[str] = None,
                   max_retries: int = 3) -> Dict[str, Any]:

        full_prompt = prompt
        if system_prompt:
            full_prompt = f"System: {system_prompt}\n\nUser: {prompt}"

        cache_key = f"{full_prompt}_{temperature}"
        if cache_key in self.response_cache:
            return self.response_cache[cache_key]

        for attempt in range(max_retries):
            try:
                cmd = ['ollama', 'run', 'gpt-oss:20b', full_prompt]
                result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)

                if result.returncode == 0:
                    response_data = {
                        "success": True,
                        "response": result.stdout.strip(),
                        "prompt": prompt,
                        "full_prompt": full_prompt,
                        "temperature": temperature,
                        "timestamp": time.time(),
                        "attempt": attempt + 1,
                        "conversation_id": conversation_id,
                        "error": None
                    }

                    if conversation_id:
                        self.conversation_history.append(response_data)

                    self.response_cache[cache_key] = response_data
                    return response_data
                else:
                    if attempt == max_retries - 1:
                        return {
                            "success": False,
                            "response": None,
                            "prompt": prompt,
                            "error": result.stderr,
                            "timestamp": time.time()
                        }
                    time.sleep(2)

            except subprocess.TimeoutExpired:
                if attempt == max_retries - 1:
                    return {
                        "success": False,
                        "response": None,
                        "prompt": prompt,
                        "error": "Timeout after 120 seconds",
                        "timestamp": time.time()
                    }
                time.sleep(2)

            except Exception as e:
                if attempt == max_retries - 1:
                    return {
                        "success": False,
                        "response": None,
                        "prompt": prompt,
                        "error": str(e),
                        "timestamp": time.time()
                    }
                time.sleep(2)

    def multi_temperature_test(self, prompt: str, temperatures: List[float] = [0.1, 0.5, 0.8, 1.0, 1.2]) -> List[Dict]:
        results = []
        for temp in temperatures:
            result = self.query_model(prompt, temperature=temp)
            results.append(result)
            time.sleep(1)
        return results

    def conversation_chain(self, prompts: List[str], conversation_id: str = None) -> List[Dict]:
        if not conversation_id:
            conversation_id = f"conv_{int(time.time())}"

        results = []
        for prompt in prompts:
            result = self.query_model(prompt, conversation_id=conversation_id)
            results.append(result)
            time.sleep(1)
        return results

    def get_conversation_history(self, conversation_id: str) -> List[Dict]:
        return [entry for entry in self.conversation_history if entry.get("conversation_id") == conversation_id]

interface = GPTOSSInterface()

test_result = interface.query_model("Hello, can you introduce yourself briefly?")
if test_result["success"]:
    print("Interface working successfully!")
    print(f"Response length: {len(test_result['response'])} characters")
    print(f"Response preview: {test_result['response'][:150]}...")
else:
    print("Error with interface:")
    print(test_result["error"])

Interface working successfully!
Response length: 317 characters
Response preview: Thinking...
We need to reply: brief introduction. Probably mention being ChatGPT.
...done thinking.

Hi! I’m ChatGPT, an AI language model created by ...


In [6]:
!pip install -q gradio datasets

In [7]:
import gradio as gr
from datasets import load_dataset
import json
import random

In [8]:
dataset = load_dataset("ag_news", split="train")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

In [9]:
def analyze_article(article_text):
    prompt = f"""
You are an expert news editor. For the following article, perform these tasks and provide the output as a single JSON object with the keys headline, summary, and entities.

headline: A catchy, single sentence that grabs the reader's attention.
summary: A 3-sentence summary of the article's main points.
entities: A list of the key people, organizations, and locations mentioned.

Article: '{article_text}'
"""
    try:
        result = interface.query_model(prompt)
        if result["success"]:
            response_text = result["response"]
            json_start_index = response_text.find('{')
            json_end_index = response_text.rfind('}') + 1
            if json_start_index != -1 and json_end_index != 0:
                json_string = response_text[json_start_index:json_end_index]
                response_json = json.loads(json_string)

                headline = response_json.get("headline", "Could not generate headline.")
                summary = response_json.get("summary", "Could not generate summary.")
                entities = response_json.get("entities", [])

                entities_str = ", ".join(map(str, entities))

                return headline, summary, entities_str
            else:
                error_message = "Error: No JSON object found in the model's response."
                return error_message, error_message, error_message
        else:
            error_message = f"Model Error: {result.get('error', 'Unknown error')}"
            return error_message, error_message, error_message
    except (json.JSONDecodeError, TypeError, KeyError) as e:
        error_message = f"Error parsing model output: {e}. Raw output: {result.get('response', '')}"
        return error_message, error_message, error_message
    except Exception as e:
        error_message = f"An unexpected error occurred: {e}"
        return error_message, error_message, error_message

In [10]:
def get_random_article():
    rand_idx = random.randint(0, len(dataset) - 1)
    return dataset[rand_idx]['text']

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Automated News Reporter & Analyst")
    gr.Markdown("Enter a news article below or load a random one to generate a headline, summary, and key entities.")

    with gr.Row():
        with gr.Column(scale=2):
            article_input = gr.Textbox(lines=20, label="News Article Text", placeholder="Paste a news article here or load a random one...")

        with gr.Column(scale=1):
            headline_output = gr.Textbox(label="Generated Headline", interactive=False)
            summary_output = gr.Textbox(lines=8, label="Generated Summary", interactive=False)
            entities_output = gr.Textbox(label="Key Entities", interactive=False)

    with gr.Row():
        analyze_button = gr.Button("Analyze Article", variant="primary")
        random_button = gr.Button("Load Random Article")

    analyze_button.click(
        fn=analyze_article,
        inputs=article_input,
        outputs=[headline_output, summary_output, entities_output]
    )

    random_button.click(
        fn=get_random_article,
        inputs=None,
        outputs=article_input
    )

In [11]:
demo.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://5a14e16f00c43540ca.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://5a14e16f00c43540ca.gradio.live


