## Step 1: Instal all the required packages

In [1]:
# GPU llama-cpp-python
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose

# For download the models
!pip install huggingface_hub

'CMAKE_ARGS' is not recognized as an internal or external command,
operable program or batch file.


Collecting huggingface_hub
  Using cached huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
Installing collected packages: huggingface-hub
Successfully installed huggingface-hub-0.16.4


## Step 2: Import all the required libraries

In [2]:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

## Step 3: Download the Models

In [None]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # the model is in bin format
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

## Step 4: Loading the model

In [3]:
model_path = r"D:/llama2_quantized_models/7B_chat/llama-2-7b-chat.ggmlv3.q8_0.bin"

# GPU
lcpp_llm = None
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=2, # CPU cores
    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=32 # Change this value based on your model and your GPU VRAM pool.
    )

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | VSX = 0 | 


In [7]:
# See the number of layers in GPU
lcpp_llm.params.n_gpu_layers

32

## Step 5: Create a Prompt Template

In [13]:
prompt = "Write a linear regression in python and plot it using seaborn"
prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

USER: {prompt}

ASSISTANT:
'''

## Step 6: Generating the Response

In [14]:
response=lcpp_llm(prompt=prompt_template, max_tokens=256, temperature=0.5, top_p=0.95,
                  repeat_penalty=1.2, top_k=150,
                  echo=True)
    

Llama.generate: prefix-match hit


In [10]:
print(response)

{'id': 'cmpl-a7cf69b8-754e-4dd9-89a6-e129c7a0f79c', 'object': 'text_completion', 'created': 1691079629, 'model': 'D:/llama2_quantized_models/7B_chat/llama-2-7b-chat.ggmlv3.q8_0.bin', 'choices': [{'text': 'SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.\n\nUSER: Write a linear regression in python\n\nASSISTANT:\nOf course! Here is an example of how to write a linear regression algorithm in Python using scikit-learn library:\n```\nimport pandas as pd\nfrom sklearn import datasets\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.model_selection import train_test_split\n\n# Load the Boston Housing dataset (a classic regression problem)\nboston = datasets.load_boston()\nX = boston.data\ny = boston.target\n\n# Split the data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n# Create a linear regression object and fit it to the training data\nreg = LinearRegression()\nreg.fit(X_trai

In [15]:
print(response["choices"][0]["text"])

SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

USER: Write a linear regression in python and plot it using seaborn

ASSISTANT:
Of course! Here is an example of how to write a linear regression in Python using scikit-learn library and then plot the results using Seaborn:
```
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns

# Generate some sample data
np.random.seed(0)  # for reproducibility
n_samples = 100
X = np.random.rand(n_samples, 3)
y = np.random.rand(n_samples) + 1

# Create a linear regression model and fit the data
model = LinearRegression()
model.fit(X, y)

# Calculate mean squared error and R-squared value
mse = mean_squared_error(y, model.predict(X))
r2 = r2_score(y, model.predict(X))
print("Mean squared error:", mse)
print("R-squared value:", r
