In [1]:
import dotenv
import os

dotenv_path = "../.env"
dotenv.load_dotenv(dotenv_path)

True

## Basic api connectivity

In [2]:
from langchain import PromptTemplate

template = """Question: {question}

Answer: """
prompt = PromptTemplate(
        template=template,
    input_variables=['question']
)

# user question
question = "Which NFL team won the Super Bowl in the 2010 season?"

In [3]:
prompt.format(question=question)

'Question: Which NFL team won the Super Bowl in the 2010 season?\n\nAnswer: '

In [11]:
from langchain import HuggingFaceHub, LLMChain
from langchain_community.llms import HuggingFaceEndpoint
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=128, temperature=0.5, token=os.environ["HUGGINGFACEHUB_API_TOKEN"]
)
llm_chain = LLMChain(prompt=prompt, llm=llm)

  warn_deprecated(
                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.
                    token was transferred to model_kwargs.
                    Please make sure that token is what you intended.


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/alexhasha/.cache/huggingface/token
Login successful
 The New Orleans Saints won the Super Bowl XLIV in the 2010 season. They defeated the Indianapolis Colts with a score of 31-28. Drew Brees was the quarterback for the Saints and he was named the Super Bowl MVP. The game was played on February 7, 2010 at the Mercedes-Benz Superdome in New Orleans, Louisiana.


In [12]:
print(llm_chain.run(question))

 The New Orleans Saints won the Super Bowl XLIV in the 2010 season. They defeated the Indianapolis Colts with a score of 31-28. Drew Brees was the quarterback for the Saints and he was named the Super Bowl MVP. The game was played on February 7, 2010 at the Mercedes-Benz Superdome in New Orleans, Louisiana.


In [13]:
qs = [
    {'question': "Which NFL team won the Super Bowl in the 2010 season?"},
    {'question': "If I am 6 ft 4 inches, how tall am I in centimeters?"},
    {'question': "Who was the 12th person on the moon?"},
    {'question': "How many eyes does a blade of grass have?"}
]
res = llm_chain.generate(qs)
res

LLMResult(generations=[[Generation(text=' The New Orleans Saints won the Super Bowl XLIV in the 2010 season. They defeated the Indianapolis Colts with a score of 31-28. Drew Brees was the quarterback for the Saints and he was named the Super Bowl MVP. The game was played on February 7, 2010 at the Mercedes-Benz Superdome in New Orleans, Louisiana.')], [Generation(text='193.04 centimeters. To convert inches to centimeters, you can use the conversion factor of 1 inch = 2.54 centimeters. So, 6 ft 4 inches = 6 * 12 inches + 4 inches = 76 inches = 76 * 2.54 cm = 193.04 cm.')], [Generation(text='12 people have walked on the moon as of now. They are: Neil Armstrong, Buzz Aldrin, Michael Collins (Apollo 11), Edwin "Buzz" Aldrin, Charles "Pete" Conrad, Alan Bean, Harrison Schmitt, James Irwin, David Scott, John Young, Charles Duke, and Eugene Cernan. So, there is no 12th person on the moon, but the list above includes all the people who have walked on the lunar surface.')], [Generation(text='0.

The instruct model is much more terse and concise, it just does what it is instructed to do.

gpt-3.5-turbo is chatty, it will talk to you beyond what is asked.

So, if you want a model to do something the instruct model will typically work better and use fewer tokens doing so.

If you want a model to interact with users in a natural, conversational manner the chat model will be much better.

In [6]:
from langchain_openai import OpenAI
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.01)
print(llm("Tell me which NFL team won the Super Bowl in 2010?"))



The New Orleans Saints won the Super Bowl in 2010.


## Document Extraction

In [2]:
from typing import List, Optional
from langchain_core.pydantic_v1 import BaseModel, Field
from enum import Enum


class EmissionsCategory(str, Enum):
    Buildings = 'B'
    Energy = 'E'
    Transportation = 'T'
    Waste = 'W'
    LandUse = 'L'


class Goal(BaseModel):
    """Information about a strategic planning Goal.

    Goals are broad, quantifiable outcomes necessary to meet emissions targets and resilience goals.
    """
    id: str = Field(description="Unique Identifier.  First category should be emissions_category letter.")
    emissions_category: EmissionsCategory = Field(description="The category of emissions the goal is associated with")
    year: Optional[int] = Field(default=None, description="The year of the goal", ge=2024, le=2050)
    description: str = Field(default=None, description="A description of the goal which should include a specific, measurable, and quantifiable outcome")


class Strategy(BaseModel):
    """Information about a strategic planning Strategy.

    Strategies define general approaches to make progress toward goals.
    They should be specific, but not necessarily quantifiable
    """
    id: str = Field(description="Unique Identifier.  First category should be emissions_category letter.")
    emissions_category: EmissionsCategory = Field(description="The category of emissions the strategy is associated with")
    related_goals: List[str] = Field(description="A list of goal ids that this strategy is related to")
    description: str = Field(default=None, description="A description of the strategy")

class Action(BaseModel):
    """Information about a strategic planning Action.

    Actions are specific, time-bound steps to implement strategies
    """
    id: str = Field(description="Unique Identifier.  First category should be emissions_category letter.")
    emissions_category: EmissionsCategory = Field(description="The category of emissions the strategy is associated with")
    owner: Optional[str] = Field(default=None, description="The organization or individual responsible for the action, if known")
    related_stragegies: List[str] = Field(description="A list of strategy ids that this strategy is related to")
    description: str = Field(default=None, description="A description of the action")


class Results(BaseModel):
    goals: List[Goal]
    strategies: List[Strategy]
    actions: List[Action]

In [9]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert municipal climate action planner "
            "Only extract relevant information from the text. "
            """Your task is to read through the provided Climate Action Plan document and extract the following
            information for each input:
            1. Emissions reductions Goals;
            2. Strategies articulated to achieve the goals;
            3. Action items defined relative to the Strategies; """
            "If you do not know the value of an attribute asked to extract, return null for the attribute's value."
            "The same text cannot be both a goal and a strategy.  Please do your best to distinguish them based on their definitions."
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

In [15]:
from langchain_openai import ChatOpenAI

#llm = ChatMistralAI(model="mistral-large-latest", temperature=0)
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)

runnable = prompt | llm.with_structured_output(schema=Results)

In [4]:
text = """OUR HOMES AND BUSINESSES
Making our homes and buildings super-efficient and electrifying our
space and water heating and cooking are core to Melrose’s Net
Zero Action Plan. The City of Melrose commits to implementing
actions that advance the following strategies:
Strategy 1. Electrify fossil-fuel end uses.
Strategy 2. Maximize uptake of residential energy efficiency
and deep energy retrofits in existing buildings.
Strategy 3. Target energy efficiency retrofits of large multi-
family buildings.
Strategy 4. Adopt policies to Incentivize energy efficiency and
renewable energy in new construction and major
renovations.
Strategy 5. Lead by example with municipal buildings and
advocate for net zero building policies.
INFOGRAPHIC CREDIT: MASSACHUSETTS CLEAN ENERGY CENTER
Melrose’s Net Zero Action Plan
20
In Melrose, 60 percent of building-related emissions result from the use of home
heating fuels like natural gas and fuel oil. Businesses and industry in Melrose rely
less heavily on fuel oil, but the use of natural gas and fuel oil still contribute
another 14 percent of building-related emissions. Melrose’s older housing stock
means the vast majority of homes were built before modern insulation and air
sealing practices and can benefit from weatherization and energy retrofits.
Burning oil and natural gas to heat our homes and cook our food creates 66,421
metric tons of emissions every year in Melrose. It also creates pollution both inside
and outdoors. In fact, recent studies have shown that methane leaks from gas-
burning stoves is far worse than previously thought."""

In [16]:
runnable.invoke({"text": text})

Results(goals=[Goal(id='E1', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Electrify fossil-fuel end uses'), Goal(id='E2', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Maximize uptake of residential energy efficiency and deep energy retrofits in existing buildings'), Goal(id='E3', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Target energy efficiency retrofits of large multi-family buildings'), Goal(id='E4', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Adopt policies to incentivize energy efficiency and renewable energy in new construction and major renovations'), Goal(id='E5', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Lead by example with municipal buildings and advocate for net zero building policies')], stragegies=[Strategy(id='E1', emissions_category=<EmissionsCategory.Energy: 'E'>, related_goals=['E1'], description='Electrify f

In [17]:
result = Out[16]

In [18]:
result.goals

[Goal(id='E1', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Electrify fossil-fuel end uses'),
 Goal(id='E2', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Maximize uptake of residential energy efficiency and deep energy retrofits in existing buildings'),
 Goal(id='E3', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Target energy efficiency retrofits of large multi-family buildings'),
 Goal(id='E4', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Adopt policies to incentivize energy efficiency and renewable energy in new construction and major renovations'),
 Goal(id='E5', emissions_category=<EmissionsCategory.Energy: 'E'>, year=None, description='Lead by example with municipal buildings and advocate for net zero building policies')]

In [19]:
result.actions

[Action(id='E1', emissions_category=<EmissionsCategory.Energy: 'E'>, owner=None, related_stragegies=['E1'], description='Implement actions to electrify fossil-fuel end uses'),
 Action(id='E2', emissions_category=<EmissionsCategory.Energy: 'E'>, owner=None, related_stragegies=['E2'], description='Implement actions to maximize uptake of residential energy efficiency and deep energy retrofits in existing buildings'),
 Action(id='E3', emissions_category=<EmissionsCategory.Energy: 'E'>, owner=None, related_stragegies=['E3'], description='Implement actions to target energy efficiency retrofits of large multi-family buildings'),
 Action(id='E4', emissions_category=<EmissionsCategory.Energy: 'E'>, owner=None, related_stragegies=['E4'], description='Implement actions to adopt policies to incentivize energy efficiency and renewable energy in new construction and major renovations'),
 Action(id='E5', emissions_category=<EmissionsCategory.Energy: 'E'>, owner=None, related_stragegies=['E5'], descri

In [22]:
result.stragegies

[Strategy(id='E1', emissions_category=<EmissionsCategory.Energy: 'E'>, related_goals=['E1'], description='Electrify fossil-fuel end uses'),
 Strategy(id='E2', emissions_category=<EmissionsCategory.Energy: 'E'>, related_goals=['E2'], description='Maximize uptake of residential energy efficiency and deep energy retrofits in existing buildings'),
 Strategy(id='E3', emissions_category=<EmissionsCategory.Energy: 'E'>, related_goals=['E2'], description='Target energy efficiency retrofits of large multi-family buildings'),
 Strategy(id='E4', emissions_category=<EmissionsCategory.Energy: 'E'>, related_goals=['E4'], description='Adopt policies to incentivize energy efficiency and renewable energy in new construction and major renovations'),
 Strategy(id='E5', emissions_category=<EmissionsCategory.Energy: 'E'>, related_goals=['E5'], description='Lead by example with municipal buildings and advocate for net zero building policies')]

In [10]:
from langchain_openai import ChatOpenAI

llm2 = ChatOpenAI(model="gpt-4-turbo", temperature=0)

runnable2 = prompt | llm2.with_structured_output(schema=Results)

In [12]:
result2 = runnable2.invoke({"text": text})

In [13]:
result2.goals

[Goal(id='B2030', emissions_category=<EmissionsCategory.Buildings: 'B'>, year=2030, description='Achieve net zero emissions from buildings by 2030.')]

In [15]:
result2.strategies

[Strategy(id='B1', emissions_category=<EmissionsCategory.Buildings: 'B'>, related_goals=['B2030'], description='Electrify fossil-fuel end uses.'),
 Strategy(id='B2', emissions_category=<EmissionsCategory.Buildings: 'B'>, related_goals=['B2030'], description='Maximize uptake of residential energy efficiency and deep energy retrofits in existing buildings.'),
 Strategy(id='B3', emissions_category=<EmissionsCategory.Buildings: 'B'>, related_goals=['B2030'], description='Target energy efficiency retrofits of large multi-family buildings.'),
 Strategy(id='B4', emissions_category=<EmissionsCategory.Buildings: 'B'>, related_goals=['B2030'], description='Adopt policies to incentivize energy efficiency and renewable energy in new construction and major renovations.'),
 Strategy(id='B5', emissions_category=<EmissionsCategory.Buildings: 'B'>, related_goals=['B2030'], description='Lead by example with municipal buildings and advocate for net zero building policies.')]

In [16]:
result2.actions

[]

## Handling Documents

The following is from LangChain tutorial: https://python.langchain.com/v0.1/docs/use_cases/extraction/how_to/handle_files/

In [29]:
import requests

AMHERST_URL = "https://secureservercdn.net/50.62.195.83/env.320.myftpupload.com/wp-content/uploads/2021/09/Amherst_7.a.-CAARP-Final_061721-2.pdf"
URL = "https://www.wayland.ma.us/sites/g/files/vyhlif9231/f/uploads/wayland_climate_action_plan_june_2022_2.pdf"

response = requests.get(URL)
data = response.content
data[:20]

b'%PDF-1.5\r%\xe2\xe3\xcf\xd3\r\n106 '

In [60]:
from langchain_community.document_loaders import PyMuPDFLoader

In [49]:
loader = PyMuPDFLoader(URL)

In [50]:
data = loader.load_and_split()

In [51]:
len(data)

13

In [53]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

faiss_index = FAISS.from_documents(data, OpenAIEmbeddings())

In [57]:
docs = faiss_index.similarity_search("Goals")
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content[:300])

4: 4 | P a g e  
system or vehicle, we envision a climate program that helps to reduce the time, effort, and uncertainty in 
finding electrically sourced heat pumps and EVs . We envision a plan to help people find vetted, quality 
energy efficiency services; to provide guidance to climate‐benefitting a
3: 3 | P a g e  
world from getting much warmer to prevent warming greater than 1.5°C (equivalent to 2.7°F) above pre‐
industrial world temperatures. We currently are at 1.1°C (2°F) above pre‐industrial world temperatures.   
If we collectively act quickly and boldly to adapt and mitigate our emissions
5: funding  to  support  municipalities  and  residents  in  taking  decarbonization  efforts  and  achieve 
environmental justice goals.
10: 10 | P a g e  
VI. Food and Waste 
Our eating habits (agriculture) are a major contributor to carbon emissions. Reducing 
our “food print” is an easy and impactful way to take climate action, with many 
added benefits to human and ecosystem health, s

In [58]:
len(docs)

4