In [1]:
from dotenv import load_dotenv
import os
import daft
from github import Auth, Github, Repository

load_dotenv()

GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
if not GITHUB_TOKEN:
    raise ValueError(
        "GitHub token not found. Please set GITHUB_TOKEN in your .env file."
    )

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
if not OPENAI_API_KEY:
    raise ValueError(
        "OpenAI API key not found. Please set OPENAI_API_KEY in your .env file."
    )

auth = Auth.Token(GITHUB_TOKEN)
github = Github(auth=auth, per_page=100)



In [2]:
import base64


def encode_image_to_base64(image_bytes):
    base64_image = base64.b64encode(image_bytes).decode('utf-8')
    return f"data:image/png;base64,{base64_image}"

In [3]:
@daft.udf(
    return_dtype=daft.DataType.struct(dict(username=daft.DataType.string(), profile_pic_url=daft.DataType.string())),
)
def get_profile_pic_urls(
    repo: daft.Series,
    limit: int | None = None,
):
    [repo] = repo.to_pylist()
    print(f"Finding profile pics for {repo}")
    repo: Repository.Repository = github.get_repo(repo)
    contributors = repo.get_contributors()
    res = []
    for contributor in contributors:
        print(f"Found contributor: {contributor.login}")
        res.append({"username": contributor.login, "profile_pic_url": contributor.avatar_url})
        if limit and len(res) >= limit:
            break
    return res

In [4]:
from openai import AsyncOpenAI
import instructor
import pydantic_to_pyarrow
import pydantic
import pyarrow as pa
import asyncio
import os

def pydantic_udf(pydantic_model: pydantic.BaseModel, udf: type, **kwargs):
    pyarrow_schema = pydantic_to_pyarrow.get_pyarrow_schema(pydantic_model)
    pyarrow_dtype = pa.struct([(c, pyarrow_schema.field(c).type) for c in pyarrow_schema.names])
    decorator = daft.udf(return_dtype=daft.DataType.from_arrow_type(pyarrow_dtype), **kwargs)
    return decorator(udf)

class AsyncStructuredLLM:
    def __init__(self):
        self.client = instructor.from_openai(AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")))

    async def analyze_single_image(self, image, response_model):
        try:
            response = await self.client.chat.completions.create(
                model="gpt-4o-mini",
                response_model=response_model,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": (
                                "Rate this profile picture on a scale from 0 to 10 "
                                "(0 = not cool at all, 10 = extremely cool). Provide: "
                                "A concise reason for your rating that explains what makes this profile picture cool or not cool."
                                "\n\n**Rating Guidelines:** "
                                "- Consider style, originality, colors, and overall aesthetic appeal in your coolness rating. "
                                "- Creative logos, interesting designs, and unique imagery can be considered cool. "
                                "- Professional headshots can be cool if they have interesting elements or style. "
                                "- Generic or bland images would rate lower on the coolness scale. "
                                "- If you cannot determine what the profile picture contains at all, explicitly state that assessment is impossible."
                            )},
                            {"type": "image_url", "image_url": {"url": encode_image_to_base64(image)}},
                        ],
                    },
                ],
                temperature=0.1,
            )
            return response.model_dump()
        except Exception as e:
            print(f"Error processing image: {e}")
            return None

    async def analyze_images(self, images, response_model):
        print(f"Analyzing {len(images)} images")
        tasks = [self.analyze_single_image(image, response_model) for image in images]
        results = await asyncio.gather(*tasks)
        return [r for r in results if r is not None]

    def __call__(self, images, response_model):
        """Sync wrapper for async execution."""
        return asyncio.run(self.analyze_images(images, response_model))

class ProfilePicScore(pydantic.BaseModel):
    score: float
    reason: str

fast_structured_llm = pydantic_udf(ProfilePicScore, AsyncStructuredLLM)

In [7]:
df = daft.from_pydict({"repo": ["Eventual-Inc/Daft"]})

# Get repo data
get_profile_pic_urls = get_profile_pic_urls.with_concurrency(1)
repo_data_with_urls = df.with_column(
    "profile_pic_url", get_profile_pic_urls(df["repo"], 10)
).with_columns({
    "profile_pic_url": daft.col("profile_pic_url").struct.get("profile_pic_url"),
    "username": daft.col("profile_pic_url").struct.get("username")
})

repo_data_with_pics = repo_data_with_urls.with_column(
    "profile_pic", daft.col("profile_pic_url").url.download()
)
repo_data_with_scores = repo_data_with_pics.with_columns(
    {"profile_pic_score": fast_structured_llm(daft.col("profile_pic"), ProfilePicScore),
     "profile_pic": daft.col("profile_pic").image.decode()
    }
).with_columns({
    "profile_pic_score": daft.col("profile_pic_score").struct.get("score"),
    "profile_pic_reason": daft.col("profile_pic_score").struct.get("reason")
})

repo_data_with_scores = repo_data_with_scores.collect(10)
repo_data_with_scores

Finding profile pics for Eventual-Inc/Daft
Found contributor: jaychia
Found contributor: samster25
Found contributor: colin-ho
Found contributor: dependabot[bot]
Found contributor: xcharleslin
Found contributor: universalmind303
Found contributor: kevinzwang
Found contributor: clarkzinzow
Found contributor: desmondcheongzx
Found contributor: rchowell



[A[A[A


[A[A[A


[A[A[A

[A[A

[A[A

[A[A
[A
[A
[AAnalyzing 10 images
                                                     d

repo Utf8,profile_pic_url Utf8,username Utf8,profile_pic Image[MIXED],profile_pic_score Float64,profile_pic_reason Utf8
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/17691182?v=4,jaychia,,7,"The profile picture features a friendly smile and a clean, professional look, which adds to its appeal. The use of natural lighting and a simple background enhances the overall aesthetic, making it approachable and engaging."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/2550285?v=4,samster25,,7,"The profile picture has a thoughtful and introspective vibe, enhanced by the soft lighting and the subject's pose. The color palette is muted yet appealing, contributing to a stylish and modern aesthetic."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/77712970?v=4,colin-ho,,6,"The profile picture has a clean and approachable aesthetic, with a simple black shirt and a friendly smile. However, it lacks unique elements or creativity that would elevate its coolness."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/in/29110?v=4,dependabot[bot],,3,"The profile picture features a simple logo design with a blue background and white checkmarks. While it is clean and professional, it lacks originality and visual interest, making it appear somewhat generic."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/4212216?v=4,xcharleslin,,7,"The black and white aesthetic adds a classic and timeless feel, while the subject's confident expression and stylish attire contribute to a cool and approachable vibe. However, it could benefit from a more dynamic background or unique elements to enhance originality."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/21327470?v=4,universalmind303,,0,Assessment is impossible as the profile picture does not contain any identifiable or visually appealing elements.
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/20215378?v=4,kevinzwang,,6,"The profile picture features a stylized illustration with a dynamic pose and a simple color palette. The lack of facial details adds a level of abstraction, which can be appealing. However, the absence of facial features may detract from personal connection."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/7699919?v=4,clarkzinzow,,7,"The profile picture captures a fun and festive atmosphere, showcasing a unique cultural theme with traditional attire. The cheerful expressions and the setting add to its overall appeal, making it feel lively and engaging."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/9080227?v=4,desmondcheongzx,,7,"The profile picture has a warm and inviting atmosphere, with a cozy setting and a friendly smile. The use of natural elements like plants adds a nice touch, making it visually appealing and approachable."
Eventual-Inc/Daft,https://avatars.githubusercontent.com/u/5731503?v=4,rchowell,,7,"The profile picture has a clean and professional aesthetic with a monochrome color scheme that adds a timeless quality. The subject's expression is confident, and the styling of the hair and clothing gives it a modern touch. However, it lacks a unique or creative element that could elevate it further."
