In [1]:
from dotenv import load_dotenv
import os
import daft
from github import Auth, Github, Repository

load_dotenv()

GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
if not GITHUB_TOKEN:
    raise ValueError(
        "GitHub token not found. Please set GITHUB_TOKEN in your .env file."
    )

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
if not OPENAI_API_KEY:
    raise ValueError(
        "OpenAI API key not found. Please set OPENAI_API_KEY in your .env file."
    )

auth = Auth.Token(GITHUB_TOKEN)
github = Github(auth=auth, per_page=100)



In [2]:
import base64


def encode_image_to_base64(image_bytes):
    base64_image = base64.b64encode(image_bytes).decode('utf-8')
    return f"data:image/png;base64,{base64_image}"

In [3]:
@daft.udf(
    return_dtype=daft.DataType.struct(dict(username=daft.DataType.string(), profile_pic_url=daft.DataType.string())),
)
def get_profile_pic_urls(
    repo: daft.Series,
    limit: int | None = None,
):
    [repo] = repo.to_pylist()
    print(f"Finding profile pics for {repo}")
    repo: Repository.Repository = github.get_repo(repo)
    contributors = repo.get_contributors()
    res = []
    for contributor in contributors:
        print(f"Found contributor: {contributor.login}")
        res.append({"username": contributor.login, "profile_pic_url": contributor.avatar_url})
        if limit and len(res) >= limit:
            break
    return res

In [8]:
from openai import AsyncOpenAI
import instructor
import pydantic_to_pyarrow
import pydantic
import pyarrow as pa
import asyncio
import os

def pydantic_udf(pydantic_model: pydantic.BaseModel, udf: type, **kwargs):
    pyarrow_schema = pydantic_to_pyarrow.get_pyarrow_schema(pydantic_model)
    pyarrow_dtype = pa.struct([(c, pyarrow_schema.field(c).type) for c in pyarrow_schema.names])
    decorator = daft.udf(return_dtype=daft.DataType.from_arrow_type(pyarrow_dtype), **kwargs)
    return decorator(udf)

class AsyncStructuredLLM:
    def __init__(self):
        self.client = instructor.from_openai(AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")))

    async def analyze_single_image(self, image, response_model):
        try:
            response = await self.client.chat.completions.create(
                model="gpt-4o-mini",
                response_model=response_model,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": (
                                "Rate how good you think this developer is on a scale from 0 to 10 based on their profile picture. "
                                "(0 = not good at all, 10 = extremely good). Provide: "
                                "A concise reason for your rating that explains what makes this profile picture good or not good."
                                "\n\n**Rating Guidelines:** "
                                "- If the profile picture is cartoony, or anime like, it is fantastic."
                                "- The more unique and interesting the profile picture is, the better."
                                "- If the profile picture is a generic headshot, it is not good."
                                "- Be very critical of profile pictures"
                            )},
                            {"type": "image_url", "image_url": {"url": encode_image_to_base64(image)}},
                        ],
                    },
                ],
                temperature=0.1,
            )
            return response.model_dump()
        except Exception as e:
            print(f"Error processing image: {e}")
            return None

    async def analyze_images(self, images, response_model):
        print(f"Analyzing {len(images)} images")
        tasks = [self.analyze_single_image(image, response_model) for image in images]
        results = await asyncio.gather(*tasks)
        return [r for r in results if r is not None]

    def __call__(self, images, response_model):
        """Sync wrapper for async execution."""
        return asyncio.run(self.analyze_images(images, response_model))

class ProfilePicScore(pydantic.BaseModel):
    score: float
    reason: str

fast_structured_llm = pydantic_udf(ProfilePicScore, AsyncStructuredLLM)

In [11]:
df = daft.from_pydict({"repo": ["Eventual-Inc/Daft"]})

# Get repo data
get_profile_pic_urls = get_profile_pic_urls.with_concurrency(1)
repo_data_with_urls = df.with_column(
    "profile_pic_url", get_profile_pic_urls(df["repo"], 10)
).with_columns({
    "profile_pic_url": daft.col("profile_pic_url").struct.get("profile_pic_url"),
    "username": daft.col("profile_pic_url").struct.get("username")
})

repo_data_with_pics = repo_data_with_urls.with_column(
    "profile_pic", daft.col("profile_pic_url").url.download()
)
repo_data_with_scores = repo_data_with_pics.with_columns(
    {"profile_pic_score": fast_structured_llm(daft.col("profile_pic"), ProfilePicScore),
     "profile_pic": daft.col("profile_pic").image.decode()
    }
).with_columns({
    "profile_pic_score": daft.col("profile_pic_score").struct.get("score"),
    "profile_pic_reason": daft.col("profile_pic_score").struct.get("reason")
})

repo_data_with_scores = repo_data_with_scores.collect(10)
repo_data_with_scores

Finding profile pics for Eventual-Inc/Daft
Found contributor: jaychia
Found contributor: samster25
Found contributor: colin-ho
Found contributor: dependabot[bot]
Found contributor: xcharleslin
Found contributor: universalmind303
Found contributor: kevinzwang
Found contributor: clarkzinzow
Found contributor: desmondcheongzx
Found contributor: rchowell



[A[A[A


[A[A[A


[A[A[A

[A[A

[A[A

[A[A
[A
[A
[AAnalyzing 10 images
                                                     d

repo Utf8,profile_pic_url Utf8,username Utf8,profile_pic Image[MIXED],profile_pic_score Float64,profile_pic_reason Utf8
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/17691182?v=4,jaychia,,3,"The profile picture is a generic headshot, lacking uniqueness or creativity. While the subject appears friendly and approachable, it does not stand out or convey a distinctive personality."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/2550285?v=4,samster25,,3,"The profile picture is a generic headshot with a serious expression, lacking uniqueness or creativity. It does not stand out and fails to convey a strong personal brand."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/77712970?v=4,colin-ho,,10,"The profile picture is cartoony and visually striking, featuring vibrant colors and a unique character design. The playful expression and stylish accessories add to its appeal, making it stand out as an interesting and engaging image."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/in/29110?v=4,dependabot[bot],,0,"The profile picture is a generic icon rather than a unique or interesting image. It lacks personality and does not stand out, making it not good for representing a developer."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/4212216?v=4,xcharleslin,,3,"The profile picture is a generic headshot in black and white, lacking uniqueness or creativity. It does not stand out and fails to convey a strong personal brand."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/21327470?v=4,universalmind303,,2,"The profile picture is a generic avatar with a simple design and lacks uniqueness or creativity, making it less appealing."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/20215378?v=4,kevinzwang,,5,"The profile picture is somewhat unique and stylized, but the lack of facial features makes it less engaging. While it has a cartoonish quality, it doesn't fully utilize the potential for expressiveness that a more detailed cartoon or anime style could provide."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/7699919?v=4,clarkzinzow,,2,"The profile picture features a generic social setting with people in traditional attire, which lacks uniqueness and creativity. It does not stand out as a distinctive or interesting representation."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/9080227?v=4,desmondcheongzx,,3,"The profile picture is a generic headshot of a person sitting on a couch, which lacks uniqueness and creativity. It does not stand out or convey a strong personal brand."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/5731503?v=4,rchowell,,3,"The profile picture is a generic headshot in black and white, lacking uniqueness or creativity. It does not stand out or convey a strong personal brand."
