In [1]:
using PyCall
using DataFrames

# Import the Hugging Face Transformers library
transformers    = pyimport("transformers")
torch           = pyimport("torch")

# Import the fine_tune_model and parse_decoded_strings functions from the Python script
py"""
import sys
sys.path.append(".")
from SGS_Transformers import BertTokenizerWrapper, RobertaTokenizerWrapper, GPT2TokenizerWrapper
"""

# Define the dataset
texts = [
    "Hello, how are you?",
    "Transformers are amazing!",
    "Let's tokenize this text."
]

# Create a DataFrame to hold the text data
df = DataFrame(text = texts)

# Define a function to create batches
function create_batches(df, batch_size)
    batches = []
    for i in 1:batch_size:size(df, 1)
        push!(batches, df[i:min(i+batch_size-1, size(df, 1)), :])
    end
    return batches
end

# Create batches with a batch size of 2
batches = create_batches(df, 2)

# Instantiate a tokenizer wrapper (e.g., BERT)
tokenizer = py"RobertaTokenizerWrapper"()




PyObject <SGS_Transformers.RobertaTokenizerWrapper object at 0x7faaabf84370>

In [2]:
# Tokenize text data using the batches
for batch in batches
    for text in batch.text
        tokens = tokenizer.tokenize(text)
        println("Original Text: $text")
        println("Tokenized Tokens: $tokens")
    end
end

Original Text: Hello, how are you?
Tokenized Tokens: ["Hello", ",", "Ġhow", "Ġare", "Ġyou", "?"]
Original Text: Transformers are amazing!
Tokenized Tokens: ["Transform", "ers", "Ġare", "Ġamazing", "!"]
Original Text: Let's tokenize this text.
Tokenized Tokens: ["Let", "'s", "Ġtoken", "ize", "Ġthis", "Ġtext", "."]


In [None]:
using CUDA

# Check if the GPU is available
if CUDA.has_cuda()
    println("CUDA is available")
    println("Device: ", CUDA.device())
else
    println("CUDA is not available")
end

In [None]:
using CUDA

# Example: Move a tensor to the GPU and perform operations
function gpu_example()
    # Create a random tensor on the CPU
    cpu_tensor = rand(Float32, 1000, 1000)
    
    # Move the tensor to the GPU
    gpu_tensor = CUDA.fill(0.0f0, 10000, 10000)
    CUDA.copyto!(gpu_tensor, cpu_tensor)
    
    # Perform operations on the GPU
    gpu_result = gpu_tensor .+ 1.0f0
    
    # Move the result back to the CPU
    cpu_result = Array(gpu_result)
    
    return cpu_result
end

result = gpu_example()
println("Result: done")

In [None]:
CUDA.versioninfo()
