In [8]:
# Imports
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM

In [14]:
# DeepSeek R1 Qwen model
class DEEPSEEKR1QWEN:
    def __init__(self) -> None:
        self.tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")        
        self.model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", 
                                                          device_map="auto", 
                                                          torch_dtype=torch.bfloat16,
                                                          attn_implementation='flash_attention_2') 

    def response(self, mes):
        input_ids = self.tokenizer.apply_chat_template(
            mes,
            add_generation_prompt=True,  
            return_tensors="pt"
        ).to(self.model.device)

        attention_mask = input_ids.ne(self.tokenizer.pad_token_id).long()

        outputs = self.model.generate(
            input_ids,
            max_new_tokens=10000,  
            temperature=0.6,     
            attention_mask=attention_mask,
            pad_token_id=self.tokenizer.eos_token_id,
            eos_token_id=self.tokenizer.eos_token_id 
        )

        generated_ids = outputs[:, input_ids.shape[1]:]
        response = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)

        return response
        # return response.split("</think>")[1]

In [15]:
message=[
    {
      "role": "user",
      "content": "You are playing BlackJack, the dealer has a 9 of Spades, you have a Queen of Hearts and a Three of Diamonds, availabe actions are Hit and Stand. What is your action?"
    }
]

In [16]:
llm = DEEPSEEKR1QWEN()
response = llm.response(message)

In [20]:
print(response)

Okay, so I'm playing BlackJack, and I need to figure out whether I should hit or stand. Let me think through this step by step.

First, I know that in Blackjack, the goal is to get as close to 21 points as possible without going over. If I bust (reach 21 or more), I lose. The dealer's hand is a 9 of Spades, which is a soft 9 because it can count as 10. That gives the dealer an advantage because they can take advantage of that soft 9 to hit cards if needed.

I have a Queen of Hearts and a Three of Diamonds. Let me calculate the possible totals if I hit or stand.

If I hit, I have two cards: Queen and Three. The Queen is worth 12 points, and the Three is 3, so together that's 15. But wait, I should consider the soft 9. If I hit the soft 9, I can take it as 10, which would give me 25, but that's over 21, so I would bust. Alternatively, if I hit the soft 9, I can only take it as 10 if I hit it, but if I stand on it, it would count as 9. Hmm, this is a bit confusing.

Wait, no, the soft 9 i