In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
import torch
from langchain_huggingface import HuggingFacePipeline  # ✅ Updated import

# Model ID
model_id = "meta-llama/Llama-3.2-1B-Instruct"

# 1️⃣ Set up `bitsandbytes` quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# 2️⃣ Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# 3️⃣ Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",    # Automatically assign to GPU
    quantization_config=bnb_config  # Use 4-bit quantization
)

# 4️⃣ Create Hugging Face pipeline (🚨 Removed `device=0`)
hf_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False  # Set to True if needed
)

# 5️⃣ Wrap in LangChain (✅ Using the new module)
llm = HuggingFacePipeline(pipeline=hf_pipeline)


Device set to use cuda:0


In [20]:
!nvidia-smi

Sat Mar 15 00:07:37 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 571.96                 Driver Version: 571.96         CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   58C    P8              5W /   90W |    2263MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [18]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from langchain_huggingface import HuggingFacePipeline  

# Load Model & Tokenizer
model_id = "meta-llama/Llama-3.2-1B-Instruct"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", quantization_config=bnb_config)

# Hugging Face Pipeline with Optimized Settings
hf_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    do_sample=True,               # Enable sampling for longer outputs
    max_new_tokens=2000,          # Significantly increase token length
    top_p=0.95,                   # Diverse output
    temperature=1.1,              # More variation in responses
    repetition_penalty=1.2        # Reduce redundancy
)

# Wrap in LangChain
llm = HuggingFacePipeline(pipeline=hf_pipeline)

# Function for generating extended responses
def generate_long_text(prompt):
    torch.cuda.empty_cache()  # Clear VRAM
    torch.cuda.reset_peak_memory_stats()

    response = llm.invoke(prompt)
    return response

# Example Query
query = "Difference between stocks and bonds ."
output = generate_long_text(query)

# Print formatted output
print("\n".join(output.split("\n\n")))  # Ensure paragraph formatting


Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


 Stocks: When you buy a stock, the company will transfer ownership to your account. Bonds: A bond essentially means lending money with interest attached but has no ownership rights that people have when they sell another one of those paper-thin things for which they pay an agreed price called payment on demand. 
One more key difference is there are two types of bonds such as  U.S., British etc bonds where it can take from $100 million to millions dollars if paid in full so we often hear many times that much! So each class may follow different rules like compound or simple rates.
There is also something else regarding bonds - there's only certain amount of time to get them sold once bought before their death dates (or expiration date) or not available anymore due to age/loss interest/maintenance/reputation etc. The reason for this specific constraint goes back to how bonds work, namely – what was first ever discovered about life after centuries ago by economists mathematicians scientist

In [1]:
import torch
print(torch.__version__)  
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should print your GPU name


2.5.1+cu121
True
NVIDIA GeForce RTX 3050 6GB Laptop GPU


In [2]:
import torch
import torchvision

print(torch.__version__)  
print(torch.cuda.is_available())  # Should return True if GPU is detected
print(torch.cuda.get_device_name(0))  # Should show your GPU name
print(torchvision.__version__)  # Should print torchvision version


2.5.1+cu121
True
NVIDIA GeForce RTX 3050 6GB Laptop GPU
0.20.1+cu121


In [1]:
!curl -X GET "https://api.mistral.ai/v1/models" -H "Authorization: Bearer ADnLliQZAzgHIIRWzdTwSRCsGeqSItbW"


{"object":"list","data":[{"id":"ministral-3b-2410","object":"model","created":1742216611,"owned_by":"mistralai","capabilities":{"completion_chat":true,"completion_fim":false,"function_calling":true,"fine_tuning":true,"vision":false},"name":"ministral-3b-2410","description":"Official ministral-3b-2410 Mistral AI model","max_context_length":131072,"aliases":["ministral-3b-latest"],"deprecation":null,"default_model_temperature":0.3,"type":"base"},{"id":"ministral-3b-latest","object":"model","created":1742216611,"owned_by":"mistralai","capabilities":{"completion_chat":true,"completion_fim":false,"function_calling":true,"fine_tuning":true,"vision":false},"name":"ministral-3b-2410","description":"Official ministral-3b-2410 Mistral AI model","max_context_length":131072,"aliases":["ministral-3b-2410"],"deprecation":null,"default_model_temperature":0.3,"type":"base"},{"id":"ministral-8b-2410","object":"model","created":1742216611,"owned_by":"mistralai","capabilities":{"completion_chat":true,"co

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 20940  100 20940    0     0  41059      0 --:--:-- --:--:-- --:--:-- 41220
