# Sentimental Analysis Model

In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "fiendfrye/mental-status-classifier-lama-3.1-8b-fine-tuned"

# Load model without device_map and without 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,  # Use float16 to save memory
    # Explicitly avoid device_map and quantization
    load_in_4bit=False,
    load_in_8bit=False
)

# Manually move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Text to classify
text = "I'm trapped in a storm of emotions that I can't control, and it feels like no one understands the chaos inside me"

# Create complete prompt
prompt = f"""Classify the text into Normal, Depression, Anxiety, and return the answer as the corresponding mental health disorder label.
text: {text}
label: """

# Generate directly with the model
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=2,
        do_sample=True,
        temperature=0.1
    )

result = tokenizer.decode(output[0], skip_special_tokens=True)
print(result.split("label: ")[-1].strip())

ImportError: Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>=0.26.0'`

In [15]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "fiendfrye/mental-status-classifier-lama-3.1-8b-fine-tuned"

# Basic loading without special parameters
try:
    # Try loading with minimum parameters
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        trust_remote_code=True,  # Add this to trust any custom code
        torch_dtype=torch.float16 if torch.cuda.is_available() else None
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    print("\nTrying alternative loading method...")
    
    # If that fails, try with low_cpu_mem_usage
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            trust_remote_code=True,
            low_cpu_mem_usage=True,
            torch_dtype=torch.float16 if torch.cuda.is_available() else None
        )
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        print("Model loaded successfully with alternative method!")
    except Exception as e:
        print(f"Alternative loading also failed: {e}")

Error loading model: Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>=0.26.0'`

Trying alternative loading method...
Alternative loading also failed: Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>=0.26.0'`


In [20]:
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import torch
import os

model_name = "fiendfrye/mental-status-classifier-lama-3.1-8b-fine-tuned"

# First, let's load just the config
config = AutoConfig.from_pretrained(model_name)

# Modify the config to remove quantization settings
if hasattr(config, "quantization_config"):
    delattr(config, "quantization_config")

# Now try loading with the modified config and other bypassing techniques
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    config=config,
    torch_dtype=torch.float16 if torch.cuda.is_available() else None,
    use_cache=True,
    device_map=None,
    revision="main",
    force_download=False,
    resume_download=True,
    local_files_only=False,
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

Fetching 2 files:   0%|          | 0/2 [02:01<?, ?it/s]


KeyboardInterrupt: 

In [None]:
from huggingface_hub import InferenceClient
from huggingface_hub import login

# Cell A5: Login to Hugging Face Hub
HF_TOKEN = HF_token  # Replace with your token
login(token=HF_TOKEN)

client = InferenceClient("fiendfrye/mental-status-classifier-lama-3.1-8b-fine-tuned")

prompt = "USER: I feel anxious all the time and can't sleep. \nASSISTANT:"
response = client.text_generation(prompt, max_new_tokens=100)

print(response)


HfHubHTTPError: 503 Server Error: Service Temporarily Unavailable for url: https://router.huggingface.co/hf-inference/models/fiendfrye/mental-status-classifier-lama-3.1-8b-fine-tuned