In [8]:
import (
	"fmt"
	"log"
	"regexp"
	"strings"

	"github.com/gocolly/colly"
     "github.com/janpfeifer/gonb/gonbui"
)



type Scraper struct {
	Title string
	Text  []string
	Links []string
}

func ScraperWeb(website string) (string, string, string) {
	c := colly.NewCollector(
		colly.AllowedDomains(website),
		colly.MaxDepth(1),
	)

	var tt Scraper
	scriptRegex := regexp.MustCompile(
		`(?i)(function\s*\(|var\s+\w+|window\.\w+|document\.\w+|parentElement|insertBefore|_stq\.push|JSON\.parse|classList\.add)`,
	)
	jsonRegex := regexp.MustCompile(`(?i)\{.*?[:].*?\}`) // Matches JSON-like text
	longLineRegex := regexp.MustCompile(`[{}()\[\];]+`)  // Detects code-like lines

	// Get page title

	c.OnHTML("title", func(h *colly.HTMLElement) {
		tt.Title = h.Text
	})

	// Find and print all links
	c.OnHTML("body", func(e *colly.HTMLElement) {
		content := e.Text
		lines := strings.Split(content, "\n")
		for _, line := range lines {
			trimmed := strings.TrimSpace(line)
			if trimmed != "" && !scriptRegex.MatchString(trimmed) &&
				!jsonRegex.MatchString(trimmed) &&
				!longLineRegex.MatchString(trimmed) {
				tt.Text = append(tt.Text, trimmed+"\n")
			}
		}
	})

	// Extract all links
	c.OnHTML("a[href]", func(e *colly.HTMLElement) {
		link := e.Request.AbsoluteURL(e.Attr("href"))
		if link != "" {
			tt.Links = append(tt.Links, link)
		}
	})

	c.OnRequest(func(r *colly.Request) {
		fmt.Println("Visiting", r.URL.String())
	})
	c.OnError(func(r *colly.Response, err error) {
		log.Println("Request URL:", r.Request.URL, "failed with response:", r, "\nError:", err)
	})

	// Start scraping
	err := c.Visit("https://" + website)
	if err != nil {
		log.Fatal("error Visit ", err)
	}

	allLines := strings.Join(tt.Text, "\n") // Joins with newline between lines
	allHref := strings.Join(tt.Links, ",")
	return tt.Title, allLines, allHref
}

In [9]:
func GetLinksUserPrompt(websiteUrl, links string) string {
	userPrompt := fmt.Sprintf("Here is the list of links on the website of  %s\n", websiteUrl)

	userPrompt += `Please decide which of theese are relevant web links for the brochure about the company, respond with the full https URL,
	Do not include Terms of Service , Privacy email links.`

	userPrompt += "\nLinks (some might be relative links):\n"
	userPrompt += fmt.Sprintf("\n %s", links)
	return userPrompt
}

In [10]:
func GetRelevantLinks(Url string )(string,string,string){
 title, websiteContent, links := ScraperWeb(Url)

systemPrompt := `You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in brochure about the company,
such as links to About page, or Company page, or Career/Jobs pages.
You Should respond in JSON as in this example.
	  {
	    "links": [
	      { "type:"about page","url":"https://full.url/goes/here/about" },
	      { "type": "careers page": "url": "https://another.full.url/careers"}
	     ]
	  }
	`
	userPrompt := GetLinksUserPrompt("https://edwarddonner.com", links)


   // Load .env file
	err := godotenv.Load()
	if err != nil {
		log.Fatal("Error loading .env file")
	}
  
    // Get API_KEY from environment variables
	apiKey := os.Getenv("API_KEY")
  
	if apiKey == "" {
		log.Fatal("API_KEY not found in .env file")
	}
  


	llm, err := openai.New(
        openai.WithBaseURL("https://api.deepseek.com"),
        openai.WithToken(apiKey),
        openai.WithModel("deepseek-chat"),
      )
	if err != nil {
		log.Fatal(err)
	}
	ctx := context.Background()
// Create messages with both system and user prompts
messages := []llms.MessageContent{
    llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt),
    llms.TextParts(llms.ChatMessageTypeHuman, userPrompt),
}
var buffer string

	if _, err := llm.GenerateContent(ctx, messages,
		llms.WithTemperature(0.8),
        llms.WithMaxTokens(2048),
        llms.WithStreamingFunc(func(ctx context.Context, chunk []byte) error {
            buffer += string(chunk) // Append chunks to buffer
			return nil                                
		
        }));err != nil{
	
		log.Fatal(err)
	}
     return title,websiteContent,buffer

    }

In [11]:
func GetALLDetails(Url string) string{

    title, c, links :=  GetRelevantLinks(Url)
    result := "Landing page:\n"
    result += title
    result += c
    result += links
    
    //for link in links["links"]:
    //    result += f"\n\n{link['type']}\n"
    //    result += Website(link["url"]).get_contents()
    return result
    }

In [12]:
%%
company := "edwarddonner.com"


// Define your system prompt and user prompt
systemPrompt := `You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.
Include details of company culture, customers and careers/jobs if you have the information.`
userPrompt :=  fmt.Sprintf("You are looking at a company called: %s\n", company)
userPrompt += "Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
userPrompt += GetALLDetails(company)


   // Load .env file
	err := godotenv.Load()
	if err != nil {
		log.Fatal("Error loading .env file")
	}
  
    // Get API_KEY from environment variables
	apiKey := os.Getenv("API_KEY")
  
	if apiKey == "" {
		log.Fatal("API_KEY not found in .env file")
	}
  


	llm, err := openai.New(
        openai.WithBaseURL("https://api.deepseek.com"),
        openai.WithToken(apiKey),
        openai.WithModel("deepseek-chat"),
      )
	if err != nil {
		log.Fatal(err)
	}
	ctx := context.Background()
// Create messages with both system and user prompts
messages := []llms.MessageContent{
    llms.TextParts(llms.ChatMessageTypeSystem, systemPrompt),
    llms.TextParts(llms.ChatMessageTypeHuman, userPrompt),
}


	response, err := llm.GenerateContent(ctx,
		messages,
		llms.WithTemperature(0.4),
        llms.WithMaxTokens(2048),
		
	)
	if err != nil {
		log.Fatal(err)
	}

	
	// Properly extract and print the response content
	if len(response.Choices) > 0 {
		content := response.Choices[0].Content
		gonbui.DisplayMarkdown(content)
       
	} else {
		fmt.Println("No response content received")
	}


Visiting https://edwarddonner.com


```markdown
# Edward Donner & Nebula.io  
*AI-Powered Talent Discovery & Career Potential*  

## About the Company  
Edward Donner is the **Co-Founder & CTO of Nebula.io**, an AI-driven platform revolutionizing talent discovery. With a mission to help individuals unlock their potential, Nebula.io combines proprietary **large language models (LLMs)** and patented matching algorithms to transform recruitment.  

### Key Highlights:  
- **AI for Talent**: Verticalized LLMs tailored for talent sourcing, engagement, and management.  
- **Proven Track Record**: Founded untapt (acquired in 2021), an AI recruitment startup.  
- **Award-Winning**: Recognized platform with satisfied customers and media acclaim.  

## Culture & Innovation  
- **Cutting-Edge AI**: Focus on **LLM engineering, agents, and data science** (evidenced by workshops/blog posts).  
- **Experimental Mindset**: Projects like *Connect Four* and *Outsmart* showcase playful yet strategic AI applications (e.g., LLM diplomacy battles).  
- **Thought Leadership**: Regular tech resources shared for AI/ML professionals.  

## Customers & Impact  
- **Recruiters & Enterprises**: Leverage Nebula.io to:  
  - Source and understand talent.  
  - Streamline candidate engagement.  
  - Drive meaningful career matches.  

## Careers  
- **Join the Mission**: Passionate about AI, talent tech, or LLMs? Connect with Edward via [LinkedIn](https://www.linkedin.com/in/eddonner/).  
- **Culture Fit**: Ideal for innovators who thrive in fast-paced, AI-first environments.  

### Get in Touch  
- **Website**: [edwarddonner.com](https://edwarddonner.com/)  
- **Newsletter**: Subscribe for updates on AI, careers, and Nebula.io.  

*"Helping people discover their reason for being."*  
```