In [1]:
# Hugging Face Transformers is an open-source framework for deep learning created by Hugging Face.
# It provides APIs and tools to download state-of-the-art pre-trained models and further tune them to maximize performance.
# These models support common tasks in different modalities, such as natural language processing, computer vision, audio, and multi-modal applications.
# Using pretrained models can reduce your compute costs, carbon footprint,
# and save you the time and resources required to train a model from scratch.

# https://huggingface.co/docs/transformers/index
# https://huggingface.co/docs/hub/index

# Accelerate library to help users easily train a 🤗 Transformers model on any type of distributed setup,
# whether it is multiple GPU's on one machine or multiple GPU's across several machines.

!pip install -q transformers langchain langchain-core langchain-huggingface huggingface-hub accelerate

In [None]:
# we need to login to Hugging Face to have access to their inference API.
# This step requires a free Hugging Face token.


import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# This class provides functionality related to Hugging Face Transformers pipelines .
#from langchain import HuggingFacePipeline
from langchain_huggingface import HuggingFacePipeline


# This line imports the AutoTokenizer class from the transformers library.
# The AutoTokenizer class is used to load tokenizers for various pre-trained language models available in the Hugging Face model hub.
from transformers import AutoTokenizer

# This line imports the entire transformers library, which is a popular library developed by
# Hugging Face for working with various transformer-based models in natural language processing (NLP),
# including both models and tokenizers.
import transformers

# This line imports the torch library, which is the primary library used for deep learning and tensor computations in PyTorch.
import torch

# Model name that we want to use
# https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
# https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0

#model = "meta-llama/Llama-2-7b-chat-hf"
model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model)

# Set up text generation pipeline
pipeline = transformers.pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 512,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id,
                )

`torch_dtype` is deprecated! Use `dtype` instead!
Device set to use cpu


In [3]:
# 'HuggingFacePipeline' class creates a custom pipeline for text generation, and we are passing
# the pipeline that we defined earlier along with some model-specific keyword arguments - temperature here.

llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})

In [4]:
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

template = """
             Create a SQL query snippet using the below text:
              ```{text}```
              Just SQL query:
           """

prompt = PromptTemplate(template=template, input_variables=["text"])

llm_chain = LLMChain(prompt=prompt, llm=llm)
# Going forward we might have to use the below code snippet instead of above:
# llm_chain = prompt | llm=llm

text = """ Extract all the unique values from column "age"
"""



  llm_chain = LLMChain(prompt=prompt, llm=llm)


In [5]:
print(llm_chain.invoke(text))

{'text': '\n             Create a SQL query snippet using the below text:\n              ``` Extract all the unique values from column "age"\n```\n              Just SQL query:\n            ``` SQL\n                SELECT DISTINCT age\n                FROM table_name\n                ```\n            ``` SQL\n                SELECT DISTINCT age\n                FROM (SELECT age\n                       FROM table_name\n                       GROUP BY age\n                       HAVING COUNT(age) > 1\n                     ) subquery\n                ```\n            ``` SQL\n                SELECT DISTINCT age\n                FROM (SELECT age\n                       FROM table_name\n                       GROUP BY age\n                       HAVING COUNT(age) > 1\n                     ) subquery\n                WHERE subquery.age IS NOT NULL\n            ```'}
