In [1]:
# API 키를 환경변수로 관리하기 위한 설정 파일
import os
from dotenv import load_dotenv

# API 키 정보 로드
load_dotenv()
os.environ["LANGCHAIN_PROJECT"] = "Gemini Agent"
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [2]:
from langchain.agents import tool


@tool
def get_word_length(word: str) -> int:
    """Returns the length of a word."""
    return len(word)


get_word_length.invoke("abc")

3

In [34]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field


class Cost(BaseModel):
    monthly: str = Field(description="Estimated monthly measurements")
    hourly: str = Field(description="Estimated measurements per hour")

class Instance(BaseModel):
    cloud_provider: str = Field(description="Name of cloud provider (GCP or AWS or Azure)")
    name: str = Field(description="Name of instance type")
    cpu: int = Field(description="The number of instance cpu cores")
    ram: float = Field(description="The capacity of instance ram (GiB)")
    storage: int = Field(description="The capacity of instance storage (GiB)")
    gpu: str = Field(description="Name of gpu model and memory (GiB) of instance")
    region: str = Field(description="Region of instance")
    cost: Cost = Field(description="Monthly and Hourly cost of instance")

class Estimate(BaseModel):
    instance: Instance = Field(description="Instance information of Cloud Provider")
    # pricing: Cost = Field(description="Estimated pricing while running an instance of the instance_type (USD)")
    power_consumption: Cost = Field(description="Estimated power consumption while running an instance of the instance_type(kWh)")
    carbon_footprint: Cost = Field(description="Estimated carbon footprint while running an instance of the instance_type(kg CO2)")
    description: str = Field(description="A rationale and detailed explanation for estimations")
    

class Result(BaseModel):
    gcp: Estimate = Field(description="Estimated Result of Google Cloud Platform(GCP)")
    aws: Estimate = Field(description="Estimated Result of Amazon Web Services(AWS)")
    azure: Estimate = Field(description="Estimated Result of Microsoft Azure")
    conclusion: Estimate = Field(description="The most appropriate among gcp, aws, and azure")

output_parser = JsonOutputParser(pydantic_object=Result)
# output_parser = JsonOutputParser()

In [39]:
from typing import Dict
from langchain.agents import tool
from google.cloud.firestore_v1.base_query import FieldFilter, Or, And
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore


@tool("aws-pricing-tool", args_schema=Instance)
def get_aws_price(cloud_provider: str, name: str, cpu: int, ram: float, storage: int, gpu: str, region: str, cost: Dict[str, str]) -> Dict[str, str]:
    """Based on gemini's suggestion, get the cheapest AWS instance with latest information"""
    if not firebase_admin._apps:
        cred = credentials.Certificate('firebase-svc-account-key.json')
        app = firebase_admin.initialize_app(cred)
    db = firestore.client()
    ref = db.collection("cloud_cost")
    vendor_filter = FieldFilter("vendor", "==", cloud_provider)
    name_filter = FieldFilter("name", "==", name)
    cpu_filter = FieldFilter("cpu", "==", cpu)
    ram_filter = FieldFilter("ram", "==", ram)
    resource_filter = And(filters=[cpu_filter, ram_filter])
    instance_filter = Or(filters=[name_filter, resource_filter])
    final_filter = And(filters=[vendor_filter, instance_filter])
    docs = ref.where(filter=final_filter).stream()

    lowest_instance = {"cost_per_hour": float("inf")}
    for doc in docs:
        if lowest_instance["cost_per_hour"] > doc.to_dict()["cost_per_hour"]:
            lowest_instance = doc.to_dict()
    instance = Instance(
        cloud_provider="AWS",
        name=lowest_instance["name"], 
        cpu=lowest_instance["cpu"], 
        ram=lowest_instance["ram"], 
        storage=storage,
        gpu=gpu,
        region=lowest_instance["region"],
        cost=Cost(hourly=lowest_instance["cost_per_hour"], monthly=31*lowest_instance["cost_per_hour"])
    )
    return instance.dict()

In [40]:
aws = {
  'cloud_provider': 'AWS',
  'name': 't3.medium',
  'cpu': 2,
  'ram': 4.0,
  'storage': 100,
  'gpu': 'None',
  'region': 'us-east-1',
  'cost': {'monthly': '$15.04', 'hourly': '$0.0021'},
}

In [41]:
get_aws_price(aws)

{'cloud_provider': 'AWS',
 'name': 't3.medium',
 'cpu': 2,
 'ram': 4.0,
 'storage': 100,
 'gpu': 'None',
 'region': 'us-east-1',
 'cost': {'monthly': '1.3671', 'hourly': '0.0441'}}

In [42]:
tools = [get_aws_price]

In [43]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage

template = ChatPromptTemplate(messages=[
    SystemMessage(content="You are a helpful AI bot. Your name is {name}."),
    HumanMessage(content="Hello, how are you doing?"),
], input_variables=["name"])

In [44]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, SystemMessage


prompt = ChatPromptTemplate(
    messages=[
        SystemMessage(content="""As an expert in analyzing software repositories and estimating resource consumption and environmental impact, your task is to provide a comprehensive analysis of a GitHub repository.

Instructions
Analyze the structure of the files in the provided GitHub repository by examining the metadata in the documentation.
Identify the entry point of the repository.
Determine the minimum resources required to run the repository on GCP, AWS, and Azure platforms.
Estimate the power consumption, and carbon footprint on each platform.
{format_instruction}
Context
The goal is to gain a detailed understanding of the repository’s requirements and its environmental impact. Your analysis should be thorough, taking into account all relevant aspects of the repository and the different cloud platforms.


GitHub Repository
{GITHUB}

Additional Guidelines
Be specific and detailed in your analysis.
Provide calculations and assumptions used in estimating resources and environmental impact.
Compare and contrast the findings across the three cloud platforms (GCP, AWS, Azure).
Use technical terminology appropriately to convey precision and expertise."""),
        HumanMessage(content="{input}"),
        # ("placeholder", "{agent_scratchpad}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ],
    input_variables=["GITHUB", "input", "agent_scratchpad"],
    partial_variables={"format_instruction": output_parser.get_format_instructions()},
)
prompt

ChatPromptTemplate(input_variables=['agent_scratchpad'], input_types={'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, partial_variables={'format_instruction': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"gcp": {"title": "Gcp", "description": "Estimated Result of Google Cloud Platform(GCP)", "allOf": [{"$re

In [45]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest", streaming=True)
llm_with_tools = llm.bind_tools(tools)

I0000 00:00:1722601365.049960  150085 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported
I0000 00:00:1722601365.050655  150085 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [46]:
from langchain.agents.format_scratchpad.openai_tools import (
    format_to_openai_tool_messages,
)

agent = (
    {
        "input": lambda x: x["input"],
        "GITHUB": lambda x: x["GITHUB"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(
            x["intermediate_steps"]
        ),
    }
    | prompt
    | llm_with_tools
    | output_parser
)

In [47]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [48]:
import os
from git import Repo
from langchain_community.document_loaders import GitLoader


repo_path = "repo/todo"
filter_dir = "backend"
branch = "main"
repo = None
if not os.path.exists(repo_path):
    repo = Repo.clone_from(
        "https://github.com/dudaji/git-inspector.git", to_path=repo_path
    )
else:
    repo = Repo(repo_path)
repo.git.checkout(branch)

docs = GitLoader(repo_path=repo_path, branch=branch).load()
backend = list(filter(lambda d: d.metadata["source"].startswith("backend/"), docs))

I0000 00:00:1722601368.732496  150085 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1722601368.732724  150085 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers
I0000 00:00:1722601368.827497  150085 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1722601368.827739  150085 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers
I0000 00:00:1722601368.870147  150085 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1722601368.870340  150085 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers
I0000 00:00:1722601368.902898  150085 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork
I0000 00:00:1722601368.903092  150085 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers
I0000 00:00:1722601368.939080  150085 wo

In [49]:
len(backend)

20

In [50]:
asnwer = agent_executor.invoke({
    "GITHUB": backend, 
    "input": "Find most proper instance for each cloud provider(GCP, AWS, Azure) and then make conclusion by choosing the cheapest instance."})
answer



[1m> Entering new AgentExecutor chain...[0m


TypeError: 'NoneType' object is not iterable