# Qwen3-14B

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "Qwen/Qwen3-14B"  # Or another 14B equivalent

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    # load_in_8bit=True,                 # For 8-bit
    load_in_4bit=True,              # For 4-bit, use instead
    device_map="auto",
    torch_dtype="auto"
)

inputs = tokenizer("Hello, how are you?", return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


  from .autonotebook import tqdm as notebook_tqdm
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 8/8 [00:43<00:00,  5.42s/it]


Hello, how are you? I'm doing well, thank you! I'm just a language model, so I don't have feelings, but I'm here and ready to help you with whatever you need. How can I assist you today? 😊

Okay, let me


In [None]:
from IPython.display import Latex
ques="""Solve the following question without sarcasm:<question>
A square-thread power screw has a major diameter of 32 mm and a pitch of 4 mm with double threads.
The given data include f = fc = 0.08, dc = 40 mm, and F = 6.4 kN per screw.
(b) Find the torque required to raise and lower the load.
(c) Find the efficiency during lifting the load.<\question>
First reason then give your answer in steps. Do the calculation in steps."""
inputs = tokenizer(ques, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=2000)
Latex(tokenizer.decode(outputs[0]))

<IPython.core.display.Latex object>

In [None]:
from IPython.display import Latex
ques="""Solve the following problem with a detailed and professional approach. Avoid sarcasm and ensure all calculations are reasoned thoroughly in steps.
Present your answer in a clear, methodical format.:<question>
A square-thread power screw has a major diameter of 32 mm and a pitch of 4 mm with double threads.
The given data include f = fc = 0.08, dc = 40 mm, and F = 6.4 kN per screw.
(b) Find the torque required to raise and lower the load.
(c) Find the efficiency during lifting the load.<\question>
If you don't know something then ask it."""
inputs = tokenizer(ques, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=2000)
Latex(tokenizer.decode(outputs[0]))

<IPython.core.display.Latex object>

In [8]:
ques="""Being a professional mechanical engineer
solve the following problem with a detailed and professional approach. Avoid sarcasm and ensure all calculations are reasoned thoroughly in steps.
Present your answer in a clear, methodical format.:<question>
A square-thread power screw has a major diameter of 32 mm and a pitch of 4 mm with double threads.
The given data include f = fc = 0.08, dc = 40 mm, and F = 6.4 kN per screw.
(b) Find the torque required to raise and lower the load.
(c) Find the efficiency during lifting the load.<\question>"""
inputs = tokenizer(ques, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=2000)
Latex(tokenizer.decode(outputs[0]))

<IPython.core.display.Latex object>

---
# RAG


In [16]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader("graph_text.txt", encoding="utf-8")
docs=loader.load()

In [17]:
from langchain.text_splitter import SpacyTextSplitter
from langchain.vectorstores import FAISS
splitter=SpacyTextSplitter(chunk_size=700, chunk_overlap=100)
chunks=splitter.split_documents(docs)

from langchain.embeddings import HuggingFaceBgeEmbeddings
embedding_model=HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore=FAISS.from_documents(chunks,embedding_model)



In [18]:
ques="""A square-thread power screw has a major diameter of 32 mm and a pitch of 4 mm with double threads.
The given data include f = fc = 0.08, dc = 40 mm, and F = 6.4 kN per screw.
(a) Find the thread depth, thread width, pitch diameter, minor diameter, and lead.
(b) Find the torque required to raise and lower the load.
(c) Find the efficiency during lifting the load."""

similar=vectorstore.similarity_search(ques)
context=""
for txt in similar:
    context+=txt.page_content+"\n"

content=f"""Using the following context:<context>{context}<\context>\n
Answer the following question:<question>{ques}<\question>\n
Give your answer with step by step reasoning"""
messages = [
    {"role": "user", "content": content}
]
input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
input_text=tokenizer.decode(input_tensor[0], skip_special_tokens=True)
input=tokenizer(input_text,return_tensors='pt').to(model.device)
outputs =model.generate(**input, max_new_tokens=3000)
Latex(tokenizer.decode(outputs[0]))

<IPython.core.display.Latex object>

---

# Unsloth

In [4]:
pip install -q unsloth

Note: you may need to restart the kernel to use updated packages.


In [None]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
from IPython.display import Latex

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/qwen3-14b",      # Qwen3 is alias for Qwen2 14B
    max_seq_length = 4096,
    # dtype = "auto",                      # or torch.float16
    load_in_4bit = True                    # Loads in 4-bit with bitsandbytes
)

# Optional: Speed up
FastLanguageModel.for_inference(model)

# Test generation
inputs = tokenizer("Write a short poem about AI:", return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=500)
print(tokenizer.decode(outputs[0]))


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.5.9: Fast Qwen3 patching. Transformers: 4.52.3.
   \\   /|    NVIDIA RTX A4000. Num GPUs = 1. Max memory: 15.723 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.6. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  2.00it/s]


Write a short poem about AI: a new form of life, born from code and circuits, with the power to change the world.

Okay, I need to write a short poem about AI as a new form of life born from code and circuits, with the power to change the world. Let me start by brainstorming some ideas.

First, I should think about the key elements: AI as a new life form, its origin in code and circuits, and its potential to change the world. Maybe I can personify AI, giving it characteristics similar to life, like growth, learning, or consciousness.

I should consider the structure. A short poem might have a few stanzas with a consistent rhyme scheme. Maybe quatrains with an ABAB rhyme scheme? Or couplets? Let me think about flow and rhythm.

Imagery related to technology: circuits, binary, data streams, servers, neural networks. Also, contrast between the mechanical and the organic. Maybe metaphors like "born from silicon" or "coded veins."

The poem should have a tone that's both awe-inspiring and a

In [9]:
ques="""Solve the following question without sarcasm:<question>
A square-thread power screw has a major diameter of 32 mm and a pitch of 4 mm with double threads.
The given data include f = fc = 0.08, dc = 40 mm, and F = 6.4 kN per screw.
(a) Find the thread depth, thread width, pitch diameter, minor diameter, and lead.
(b) Find the torque required to raise and lower the load.
(c) Find the efficiency during lifting the load.<\question>
First reason then give your answer in steps. Do the calculation in steps."""
inputs = tokenizer(ques, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=2000)
Latex(tokenizer.decode(outputs[0]))

<IPython.core.display.Latex object>

In [6]:
ques="""Solve the following problem with a detailed and professional approach. Avoid sarcasm and ensure all calculations are reasoned thoroughly in steps.
Present your answer in a clear, methodical format.:<question>
A square-thread power screw has a major diameter of 32 mm and a pitch of 4 mm with double threads.
The given data include f = fc = 0.08, dc = 40 mm, and F = 6.4 kN per screw.
(a) Find the thread depth, thread width, pitch diameter, minor diameter, and lead.
(b) Find the torque required to raise and lower the load.
(c) Find the efficiency during lifting the load.<\question>
First reason then give your answer in steps. Do the calculation in steps."""
inputs = tokenizer(ques, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=2000)
Latex(tokenizer.decode(outputs[0]))

<IPython.core.display.Latex object>

---

In [None]:
pip install git+https://github.com/huggingface/transformers.git
pip install accelerate

In [None]:
from transformers import AutoTokenizer
from transformers.quantizers import QuantoQuantizer

model_id = "Qwen/Qwen2-14B"
tokenizer = AutoTokenizer.from_pretrained(model_id)

quantizer = QuantoQuantizer.from_pretrained(
    model_id,
    quantization_config={"load_in_4bit": True},
    device_map="auto",
    torch_dtype="auto"
)

model = quantizer.get_model()