In [7]:
#based on code from https://github.com/emarco177/ice_breaker

import os
from dotenv import load_dotenv

from langchain import hub
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.tools import Tool
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults
from typing import List, Dict, Any

from langchain_core.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

In [8]:
# Define a Pydantic model to structure the LLM output
# It includes a short summary and a list of interesting facts
class Summary(BaseModel):
    summary: str = Field(description="summary")
    facts: List[str] = Field(description="interesting facts about them")

    # Utility method to convert the model to a dictionary (optional use)
    def to_dict(self) -> Dict[str, Any]:
        return {"summary": self.summary, "facts": self.facts}

# Create a LangChain-compatible parser that expects output matching the Summary model
summary_parser = PydanticOutputParser(pydantic_object=Summary)

# Use Tavily to search GitHub for a person's profile based on their name/location
# Returns the best result's URL and content (if found), otherwise returns None
def get_profile_url_and_content(name: str):
    """Search GitHub using Tavily and return the best match (url + content)."""
    search = TavilySearchResults()
    results = search.invoke(f"{name} GitHub site:github.com")

    if results and isinstance(results, list):
        best = results[0]
        return best["url"], best["content"]
    return None, None

# Uses a prompt + OpenAI LLM + Pydantic parser chain to generate a structured profile summary
def summarize_profile(info: str) -> str:
    """Uses LLM to summarize profile text and extract two interesting facts."""
    llm = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")

    # Template tells the LLM what kind of response is expected
    summary_template = """
    Given the following GitHub profile content:
    {information}

    Return:
    1. A short summary of the person
    2. Two interesting facts about them
    \n{format_instructions}
    """

    # Define a prompt that takes both the GitHub content and format instructions
    prompt_template = PromptTemplate(
        input_variables=["information", "format_instructions"],
        template=summary_template,
    )

    # Chain together the prompt, LLM, and output parser
    chain = prompt_template | llm | summary_parser

    # Invoke the chain with the profile text and required format instructions
    return chain.invoke(
        {
            "information": info,
            "format_instructions": summary_parser.get_format_instructions(),
        }
    )

In [9]:
# Load environment variables from a .env file (e.g., OpenAI API key)
load_dotenv()

# Prompt the user and explain the purpose of the script
print("Using LangChain with Tavily and OpenAI to find a person's GitHub URL and summarize their public profile.")

# Get user input: a person's name and location to help guide the search
name = input("Enter the person's name and location (e.g., 'John Mansfield Ithaca NY'): ")

# Use Tavily search to find the best-matching GitHub profile and retrieve its content
github_url, profile_content = get_profile_url_and_content(name)

# If a GitHub URL and profile content were successfully retrieved
if github_url and profile_content:
    print(f"Found GitHub URL: {github_url}\n")

    # Use an LLM to summarize the GitHub profile content and extract interesting facts
    summary = summarize_profile(profile_content)

    # Print the structured summary (this may be a Pydantic object)
    print(summary)
else:
    # Inform the user if no profile was found
    print("No GitHub profile found.")

Using LangChain with Tavily and OpenAI to find a person's GitHub URL and summarize their public profile.


Enter the person's name and location (e.g., 'John Mansfield Ithaca NY'):  John Mansfield Ithaca, NY


Found GitHub URL: https://github.com/jlm429

summary='John Mansfield, known by the username jlm429, is an accomplished GitHub user with notable achievements in the open-source community.' facts=["He has received the 'Starstruck' achievement, indicating a significant level of engagement on GitHub.", "He is recognized as an 'Arctic Code Vault Contributor', highlighting his contributions to projects that are preserved for future generations."]


In [10]:
# Display the parsed summary and interesting facts from the GitHub profile
# in a nicely formatted Markdown layout within the Jupyter notebook.
# Uses IPython's display and Markdown to render clean, readable output.

from IPython.display import display, Markdown

display(Markdown(f"### GitHub Summary\n{summary.summary}"))
display(Markdown("### Interesting Facts"))
for fact in summary.facts:
    display(Markdown(f"- {fact}"))

### GitHub Summary
John Mansfield, known by the username jlm429, is an accomplished GitHub user with notable achievements in the open-source community.

### Interesting Facts

- He has received the 'Starstruck' achievement, indicating a significant level of engagement on GitHub.

- He is recognized as an 'Arctic Code Vault Contributor', highlighting his contributions to projects that are preserved for future generations.