In [5]:
from transformers import AutoTokenizer, AutoModelWithLMHead, SummarizationPipeline

In [6]:
model_size = "base"
model_id = f"SEBIS/code_trans_t5_{model_size}_source_code_summarization_python"

In [7]:
pipeline = SummarizationPipeline(
    model=AutoModelWithLMHead.from_pretrained(model_id),
    tokenizer=AutoTokenizer.from_pretrained(model_id, skip_special_tokens=True),
    device=0,
)

In [13]:
code_snippet = """def svg_to_image(string, size=None):
    if isinstance(string, unicode):
        string = string.encode('utf-8')
    renderer = QtSvg.QSvgRenderer(QtCore.QByteArray(string))
    if not renderer.isValid():
        raise ValueError('Invalid SVG data.')
    if size is None:
        size = renderer.defaultSize()
    image = QtGui.QImage(size, QtGui.QImage.Format_ARGB32)
    painter = QtGui.QPainter(image)
    renderer.render(painter)
    return image"""

In [20]:
pipeline(
    [code_snippet],
    do_sample=True,
    temperature=0.7,
    top_p=0.5,
    num_return_sequences=2,
    max_length=200,
    )

Your max_length is set to 200, but your input_length is only 113. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)


[[{'summary_text': 'How to make a slower timer ?'},
  {'summary_text': 'How to use TermQuery to do an AND query across a QTreeWidget ?'}]]

In [21]:
code_snippet="""
def fibonacci(n):
    if n == 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fibonacci(n-1) + fibonacci(n-2)
"""
pipeline(
    [code_snippet],
    do_sample=True,
    temperature=0.9,
    top_p=0.5,
    num_return_sequences=2,
    max_length=200,
    )

Your max_length is set to 200, but your input_length is only 36. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)


[[{'summary_text': 'How to make a function that returns a list of functions its arguments ?'},
  {'summary_text': 'How to make a function that returns a bound to a list of a function ?'}]]

In [25]:
code_snippet = """
def history_to_hash(npz: Union[None, str, FullGeneration]):
    if npz is None:
        return get_md5_hex(b"None")
    if isinstance(npz, str):
        return get_hash_from_voice_name(npz)
    npz_str = json.dumps(
        {
            "semantic_prompt": npz["semantic_prompt"].tolist(),
            "coarse_prompt": npz["coarse_prompt"].tolist(),
            "fine_prompt": npz["fine_prompt"].tolist(),
        }
    )
    npz_as_str = npz_str.encode("utf-8")
    return get_md5_hex(npz_as_str)
"""
pipeline(
    [code_snippet],
    do_sample=False,
    # temperature=0.1,
    # top_p=0.1,
    num_return_sequences=1,
    max_length=200,
    )

Your max_length is set to 200, but your input_length is only 172. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=86)


[{'summary_text': 'How to make a custom event with a masked object ?'}]