In [5]:
# %pip install -qU transformers accelerate einops langchain xformers bitsandbytes
# %pip install scipy

In [1]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-70b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# Need auth token for these
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 15/15 [00:42<00:00,  2.81s/it]
Downloading (…)neration_config.json: 100%|██████████| 167/167 [00:00<00:00, 10.2kB/s]


Model loaded on cuda:0


In [2]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



# Stop Criteria

If we don't provide a stopping criteria the model just goes on a bit of a tangent after answering the initial question.

In [None]:
# stop_list = ['\nUser:', '\n```\n']

# stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
# stop_token_ids

In [None]:
# import torch

# stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
# stop_token_ids

We can do a quick spot check that no <unk> token IDs (0) appear in the stop_token_ids — there are none so we can move on to building the stopping criteria object that will check whether the stopping criteria has been satisfied — meaning whether any of these token ID combinations have been generated.

In [None]:
# from transformers import StoppingCriteria, StoppingCriteriaList

# # define custom stopping criteria object
# class StopOnTokens(StoppingCriteria):
#     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
#         for stop_ids in stop_token_ids:
#             if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
#                 return True
#         return False

# stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [3]:
generate_text = transformers.pipeline(
    model=model, 
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task="text-generation",
    # we pass model parameters here too
    #stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [4]:
from langchain import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=generate_text)

## SQL local model - Chinook DB

In [5]:
from langchain import SQLDatabase, SQLDatabaseChain
db = SQLDatabase.from_uri("sqlite:///Chinook.db")
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [6]:
db_chain.run("How many people are in Employee table?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many people are in Employee table?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM Employee;[0m
SQLResult: [33;1m[1;3m[(8,)][0m
Answer:[32;1m[1;3mThere are 8 people in Employee table.[0m
[1m> Finished chain.[0m


'There are 8 people in Employee table.'

In [7]:
db_chain.run("How many albums by Aerosmith?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many albums by Aerosmith?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM Album WHERE ArtistId = (SELECT ArtistId FROM Artist WHERE Name = 'Aerosmith');[0m
SQLResult: [33;1m[1;3m[(1,)][0m
Answer:[32;1m[1;3mThere is one album by Aerosmith.[0m
[1m> Finished chain.[0m


'There is one album by Aerosmith.'

In [8]:
db_chain.run("Provide 10 tracks by Led Zeppelin")



[1m> Entering new SQLDatabaseChain chain...[0m
Provide 10 tracks by Led Zeppelin
SQLQuery:

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


[32;1m[1;3mSELECT * FROM Track WHERE GenreId = (SELECT GenreId FROM Genre WHERE Name = 'Rock');[0m
SQLResult: [33;1m[1;3m[(1, 'For Those About To Rock (We Salute You)', 1, 1, 1, 'Angus Young, Malcolm Young, Brian Johnson', 343719, 11170334, 0.99), (2, 'Balls to the Wall', 2, 2, 1, None, 342562, 5510424, 0.99), (3, 'Fast As a Shark', 3, 2, 1, 'F. Baltes, S. Kaufman, U. Dirkscneider & W. Hoffman', 230619, 3990994, 0.99), (4, 'Restless and Wild', 3, 2, 1, 'F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman', 252051, 4331779, 0.99), (5, 'Princess of the Dawn', 3, 2, 1, 'Deaffy & R.A. Smith-Diesel', 375418, 6290521, 0.99), (6, 'Put The Finger On You', 1, 1, 1, 'Angus Young, Malcolm Young, Brian Johnson', 205662, 6713451, 0.99), (7, "Let's Get It Up", 1, 1, 1, 'Angus Young, Malcolm Young, Brian Johnson', 233926, 7636561, 0.99), (8, 'Inject The Venom', 1, 1, 1, 'Angus Young, Malcolm Young, Brian Johnson', 210834, 6852860, 0.99), (9, 'Snowballed', 1, 1, 1, 'Angus Young

OutOfMemoryError: CUDA out of memory. Tried to allocate 5.54 GiB (GPU 0; 47.54 GiB total capacity; 40.39 GiB already allocated; 4.47 GiB free; 42.04 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

### Use Query Checker - check Cutomize Prompt

In [None]:
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, use_query_checker=True)

## Using Agent

# Text Gen

In [None]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

In [None]:
llm(prompt="Explain to me the difference between AI and ML.")

"\n\nAnswer: Sure! Artificial intelligence (AI) and machine learning (ML) are related terms, but they refer to different concepts within the field of computer science. Here's a brief explanation of each term and how they differ:\n\nArtificial Intelligence (AI):\n\n* AI refers to the broader field of research and development aimed at creating machines that can perform tasks that typically require human intelligence, such as understanding language, recognizing images, making decisions, and solving problems.\n* AI involves a wide range of techniques, including rule-based systems, decision trees, and logic-based systems.\n* AI can be applied to various industries, such as healthcare, finance, transportation, and education.\n\nMachine Learning (ML):\n\n* ML is a subset of AI that focuses specifically on developing algorithms and statistical models that enable machines to learn from data, without being explicitly programmed.\n* In other words, ML is a type of AI that allows machines to impro

In [None]:
llm(prompt="Provide SQL query to show table content")

'\n\nI am trying to provide a SQL query that will display the contents of a table. The table name is "employees" and it has columns for "employee_id", "first_name", "last_name", "department", and "salary". I want to display all the columns and their values for each row in the table.\n\nHere\'s what I have tried so far:\n```\nSELECT * FROM employees;\n```\nThis query returns an error saying that it is not a valid SQL query. I\'m not sure what I\'m doing wrong. Can someone please help me with this?\n\nAnswer: Your query is almost correct! You just need to add a space between `SELECT` and `*` to make it a valid SQL query. Here\'s the corrected version:\n```\nSELECT * FROM employees;\n```\nThis query will retrieve all rows from the "employees" table and display all columns and their values for each row.\n\nAlternatively, you can also use the `SELECT` clause to specify which columns you want to retrieve, like this:\n```\nSELECT employee_id, first_name, last_name, department, salary FROM emp

In [None]:
llm(prompt="Summarize SQL table content")



' in a simple and concise way.\n *\n * @param {object} obj - The SQL table data as an object with column names as keys and values as values.\n * @return {string} - A string representation of the SQL table data.\n */\nfunction summarizeSqlTable(obj) {\n  const columns = Object.keys(obj);\n  const rows = [];\n\n  for (const [index, value] of Object.entries(obj)) {\n    const row = {};\n    for (const column of columns) {\n      row[column] = value;\n    }\n    rows.push(row);\n  }\n\n  return rows;\n}\n\n// Example usage:\nconst tableData = {\n  "id": 1,\n  "name": "John Doe",\n  "age": 32,\n  "email": "johndoe@example.com"\n};\n\nconst summary = summarizeSqlTable(tableData);\nconsole.log(summary);\n// Output:\n// [\n//   {\n//     "id": 1,\n//     "name": "John Doe",\n//     "age": 32,\n//     "email": "johndoe@example.com"\n//   }\n// ]\n```\nThis function takes an object with column names as keys and values as values, and returns an array of objects, where each object represents a row

In [None]:
llm(prompt="Provide contents of the Employee tablet")

'.\n\t * \n\t */\n\tpublic void getEmployeeTable() {\n\t\ttry {\n\t\t\tString query = "SELECT * FROM employees";\n\t\t\tStatement stmt = con.createStatement();\n\t\t\tResultSet rs = stmt.executeQuery(query);\n\t\t\t\n\t\t\twhile (rs.next()) {\n\t\t\t\tint empId = rs.getInt("emp_id");\n\t\t\t\tString empName = rs.getString("emp_name");\n\t\t\t\tString empDept = rs.getString("department");\n\t\t\t\tdouble empSalary = rs.getDouble("salary");\n\t\t\t\t\n\t\t\t\tSystem.out.println("Employee ID: " + empId);\n\t\t\t\tSystem.out.println("Employee Name: " + empName);\n\t\t\t\tSystem.out.println("Department: " + empDept);\n\t\t\t\tSystem.out.println("Salary: " + empSalary);\n\t\t\t}\n\t\t} catch (SQLException e) {\n\t\t\te.printStackTrace();\n\t\t}\n\t}\n\t\n\t// Method to Provide contents of the Department table.\n\t * \n\t */\n\tpublic void getDepartmentTable() {\n\t\ttry {\n\t\t\tString query = "SELECT * FROM department";\n\t\t\tStatement stmt = con.createStatement();\n\t\t\tResultSet rs = st

# Save the Model

In [None]:
model.save_pretrained(".")

NotImplementedError: You are calling `save_pretrained` on a 4-bit converted model. This is currently not supported