In [14]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import transformers
from huggingface_hub import login
from google.colab import userdata
import re
import torch

In [15]:
model_name = "meta-llama/Llama-2-7b-chat-hf"  # Or "meta-llama/Llama-2-13b-chat" for larger model
login(userdata.get('modelToken'))
# Load model and tokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (no

In [20]:
import transformers

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=device,
)

In [21]:
def solve_math_problems(questions):
    with torch.no_grad():
        sequences = pipeline(
            [f'Provide only the numerical answer for this math problem: {question} = ？' for question in questions],
            do_sample=False,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            max_length=200,
            truncation=True
        )
        # Extract only the content after "Answer:" using regex
        answers = [
            re.search(r"Answer:\s*(.*)", sequence[0]["generated_text"]).group(1).strip()
            if re.search(r"Answer:\s*(.*)", sequence[0]["generated_text"])
            else ""
            for sequence in sequences
        ]
    return answers

In [22]:
!pip install datasets
import pandas as pd
from datasets import Dataset
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from concurrent.futures import as_completed
input_file = "arithmetic_dataset_twodigit_part1.csv"
data = pd.read_csv(input_file)
answers = {}



In [29]:
data = data[['Question Symbolic', 'Question Text 1', 'Question Text 2', 'Question Text 3']]
data = data.head()

dataset = Dataset.from_pandas(data)

In [30]:
def process_column(column_name):
    return solve_math_problems(data[column_name].dropna())

In [31]:
# answers = {}
with ThreadPoolExecutor(max_workers=4) as executor:
    futures = {executor.submit(process_column, column): column for column in dataset.column_names}
    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing columns"):
        column_name = futures[future]
        try:
            answers[column_name] = future.result()
        except Exception as e:
            print(f"Error processing column {column_name}: {e}")


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Processing columns: 100%|██████████| 4/4 [00:05<00:00,  1.42s/it]


In [33]:
# Create a new DataFrame with the results
output_df = pd.DataFrame({
    'Answer Symbolic': answers['Question Symbolic'],
    'Answer Text 1': answers['Question Text 1'],
    'Answer Text 2': answers['Question Text 2'],
    'Answer Text 3': answers['Question Text 3']
})

# Save the output to a new CSV file
output_df.to_csv("generated_answers.csv", index=False)

In [None]:
# question = "What is 3 + 4?"
# print(solve_math_problems([question]))