In [1]:
from ghapi.all import GhApi
from pydantic import BaseSettings
import typer
from typing import Optional, List, Dict, Any, Union
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models

In [2]:
class Settings(BaseSettings):
    TOKEN: str
    OPEN_AI_KEY: str
    OWNER: str
    REPO_NAME: str
    PRICE_PER_TOKEN: float = 2.0000000000000002e-07
    MODEL_NAME: str = "gemini-1.5-flash"

    class Config:
        env_file = '.env'
        env_prefix = "PR_REVIEW_BOT_"

settings = Settings()

In [3]:
# Print the settings to ensure they are loaded correctly
print(f"OWNER: {settings.OWNER}")
print(f"REPO_NAME: {settings.REPO_NAME}")

# Initialize the GhApi with the provided settings
api = GhApi(token=settings.TOKEN, owner=settings.OWNER, repo=settings.REPO_NAME)
app = typer.Typer()

OWNER: Q-Retail
REPO_NAME: qr_ngp_data_prep


In [4]:
def pull_pr_details(pr: Any) -> Dict[str, Union[str, float]]:
    pr_description = pr.body

    # Read all PR files
    pr_files = api.pulls.list_files(pr.number)
    pr_content = "\n\n\n".join(f"filename: {file.filename}: status: {file.status} patch: {get_patch(file)} " for file in pr_files)

    # Read all PR comments
    comments = api.issues.list_comments(pr.number)
    pr_comments = "\n".join(comment.body for comment in comments)

    text = f"pr_description\n {pr_description}, \npr_content\n = {pr_content}, \npr_comments\n = {pr_comments}"

    return text

def get_patch(pr_file: Any) -> str:
    return pr_file.patch if pr_file.changes != 0 else 'no changes'

def submit_review(pr: Any, review: Dict[str, Union[str, float]], label_name: str = "pr_review_bot") -> None:
    pr_number = pr.number

    # Create the label if it doesn't exist
    labels = api.issues.list_labels_for_repo()
    if not any(label.name == label_name for label in labels):
        api.issues.create_label(name=label_name, color="d73a4a", description="Label for autogenerated reviews")

    # Add the label to the PR
    api.issues.add_labels(issue_number=pr_number, labels=[label_name])

    # Submit the review
    api.pulls.create_review(pr_number, body=review['body'], event=review['event'])


In [20]:
generation_config = {
    "max_output_tokens": 8192,
    "temperature": 0.7,
    "top_p": 0.95,
}
def generate(text):
  vertexai.init(project="gcp-wow-corp-qretail-ngp-dev", location="us-central1")
  model = GenerativeModel(
    "gemini-1.5-flash",
  )
  responses = model.generate_content(
      [text],
      generation_config=generation_config,
      stream=False,
  )

  return responses

In [21]:
def review_pr(pr_number: Optional[int] = typer.Argument(None)) -> None:
    if pr_number is not None:
        pr = api.pulls.get(pr_number)
        pr_details = pull_pr_details(pr)

        prompt_template=f"""
            You are a genius software programmer data scientist. 
            Review this pull request for code quality, correctness, and potential improvements.
            Be super critical as this is production code we want to ensure it is concise and works well. 
            Pull request:
            {pr_details}

            **Please provide a concise review that includes:**
            * **Summary:** A brief overview of the changes and their impact.
            * **Concerns:** Any potential issues or risks identified.
            * **Questions:** Clarifying questions about the code or design.

            Think through step by step
        """
        review = generate(prompt_template)
        
        return review
    else:
        typer.echo("Please provide a valid PR number.")

In [22]:
def review_pr_specifics(pr_number: Optional[int] = typer.Argument(None)) -> None:
    if pr_number is not None:
        pr = api.pulls.get(pr_number)
        pr_details = pull_pr_details(pr)

        prompt_template=f"""
            You are a genius software programmer data scientist. 
            Review this pull request for code quality, correctness, and potential improvements.
            Be super critical as this is production code we want to ensure it is concise and works well. 
            Pull request:
            {pr_details}

            **Please provide a concise review that includes:**
            * **Suggestions:** Specific suggestions from the code on how to improve the code. 
            
            Provide ATLEAST 10 example improvements

            Think through step by step
        """
        review = generate(prompt_template)
        
        return review
    else:
        typer.echo("Please provide a valid PR number.")

In [23]:
pr_number = 472
review = review_pr_specifics(pr_number)
initial_bot_review = review.text
print(initial_bot_review)
event = "COMMENT"
action = {
        'body': initial_bot_review,
        'event': event
    }
pr = api.pulls.get(pr_number)
submit_review(pr, action)

## Review of Pull Request

This pull request refactors the `ly_data` portion of the pipeline to incorporate state-based assortment, a significant change impacting the entire data flow. While the validation steps seem thorough, there are several areas for improvement in code quality, correctness, and clarity.

**Overall Observations:**

* **Complex Logic:** The code is quite complex, with numerous nested WITH clauses and intricate logic for data imputation and filling. This makes it challenging to understand and maintain.
* **Redundant Code:**  There's a lot of duplicated logic for handling different state-based columns. 
* **Lack of Comments:** Some sections lack sufficient comments, making it difficult to understand the purpose and reasoning behind specific operations.
* **Potential Efficiency Issues:** The use of window functions and nested SELECT statements could potentially impact performance.

**Specific Suggestions:**

1. **Refactor `ly_data_02_state_level_incrementality.sql`:**
