In [11]:
import pickle

from transformers import AutoFeatureExtractor, ASTForAudioClassification
#from datasets import load_dataset
import torch
from torch.nn import functional as F


In [27]:
# path = r'D:\cy101\cy101_Binary\shake\ball_basket\trial-1\audio.bin'
path = r'D:\cy101\cy101_Binary\shake\medicine_calcium\trial-1\audio.bin'

bin_file = open(path, 'rb')
data = pickle.load(bin_file)
bin_file.close()

print('data: ', data.shape)

data:  (92602,)


In [30]:
feature_extractor = AutoFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
print('model.config.id2label: ', model.config.id2label)

sampling_rate = 16000
inputs = feature_extractor(data, sampling_rate=sampling_rate, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_ids = torch.argmax(logits, dim=-1).item()
predicted_label = model.config.id2label[predicted_class_ids]
print('predicted_label: ', predicted_label)

proba = F.softmax(logits, dim=1)[0]
print('proba: ', proba.shape)

labels_proba = {}
for class_id, p in enumerate(proba):
    label = model.config.id2label[class_id]
    labels_proba[label] = p.item()

model.config.id2label:  {0: 'Speech', 1: 'Male speech, man speaking', 2: 'Female speech, woman speaking', 3: 'Child speech, kid speaking', 4: 'Conversation', 5: 'Narration, monologue', 6: 'Babbling', 7: 'Speech synthesizer', 8: 'Shout', 9: 'Bellow', 10: 'Whoop', 11: 'Yell', 12: 'Battle cry', 13: 'Children shouting', 14: 'Screaming', 15: 'Whispering', 16: 'Laughter', 17: 'Baby laughter', 18: 'Giggle', 19: 'Snicker', 20: 'Belly laugh', 21: 'Chuckle, chortle', 22: 'Crying, sobbing', 23: 'Baby cry, infant cry', 24: 'Whimper', 25: 'Wail, moan', 26: 'Sigh', 27: 'Singing', 28: 'Choir', 29: 'Yodeling', 30: 'Chant', 31: 'Mantra', 32: 'Male singing', 33: 'Female singing', 34: 'Child singing', 35: 'Synthetic singing', 36: 'Rapping', 37: 'Humming', 38: 'Groan', 39: 'Grunt', 40: 'Whistling', 41: 'Breathing', 42: 'Wheeze', 43: 'Snoring', 44: 'Gasp', 45: 'Pant', 46: 'Snort', 47: 'Cough', 48: 'Throat clearing', 49: 'Sneeze', 50: 'Sniff', 51: 'Run', 52: 'Shuffle', 53: 'Walk, footsteps', 54: 'Chewing, m

predicted_label:  Coin (dropping)
proba:  torch.Size([527])


In [29]:
top_n = 10
[{k: v} for k, v in sorted(labels_proba.items(), key=lambda item: item[1])][-top_n:]

[{'Printer': 0.020859986543655396},
 {'Vehicle': 0.022946903482079506},
 {'Cash register': 0.026049446314573288},
 {'Music': 0.02659224532544613},
 {'Scissors': 0.02832096256315708},
 {'Rattle': 0.035546962171792984},
 {'Speech': 0.05043497681617737},
 {'Inside, small room': 0.05208072066307068},
 {'Single-lens reflex camera': 0.09221113473176956},
 {'Coin (dropping)': 0.17451658844947815}]

# sentence-transformers/paraphrase

In [3]:
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch

In [2]:
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')
embeddings = model.encode(sentences)
print(embeddings)

Downloading (…)31d34/.gitattributes:   0%|          | 0.00/345 [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)e4a1a31d34/README.md:   0%|          | 0.00/3.74k [00:00<?, ?B/s]

Downloading (…)a1a31d34/config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading (…)31d34/tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/550 [00:00<?, ?B/s]

Downloading (…)1a31d34/modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

[[ 0.11453559  0.07683486  0.02626469 ... -0.13231887 -0.00558196
   0.31623384]
 [ 0.00646675  0.16544811 -0.03636225 ...  0.18916449  0.20142542
   0.24428211]]


In [4]:

#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


# Sentences we want sentence embeddings for
sentences = ['This is an example sentence', 'Each sentence is converted']

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-xlm-r-multilingual-v1')
model = AutoModel.from_pretrained('sentence-transformers/paraphrase-xlm-r-multilingual-v1')

# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)

# Perform pooling. In this case, max pooling.
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

print("Sentence embeddings:")
print(sentence_embeddings)


Downloading (…)okenizer_config.json:   0%|          | 0.00/550 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (…)lve/main/config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Sentence embeddings:
tensor([[ 0.1145,  0.0768,  0.0263,  ..., -0.1323, -0.0056,  0.3162],
        [ 0.0065,  0.1654, -0.0364,  ...,  0.1892,  0.2014,  0.2443]])


In [16]:
# Convert embeddings back to sentences
decoded_sentences = [tokenizer.decode(ids) for ids in encoded_input['input_ids']]

print("Output sentences:")
print(decoded_sentences)

Output sentences:
['<s> This is an example sentence</s>', '<s> Each sentence is converted</s>']


In [8]:
for output in model_output:
    line = tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
#     line = tokenizer.decode(output)
    print(line)

TypeError: argument 'ids': Can't extract `str` to `Vec`

In [14]:
# Get the decoded sentence from the token IDs
decoded_sentence = tokenizer.decode(model_output['pooler_output'][0])

print(decoded_sentence)

TypeError: argument 'ids': 'float' object cannot be interpreted as an integer

In [13]:
model_output['pooler_output']

tensor([[ 0.1159,  0.0142, -0.1938,  ..., -0.0496, -0.0042, -0.0378],
        [ 0.1686, -0.0109, -0.2608,  ...,  0.0164, -0.0215, -0.0750]])

In [15]:
# Convert embeddings back to sentences
decoded_sentences = [tokenizer.decode(ids) for ids in encoded_input['input_ids']]

print("Output sentences:")
print(decoded_sentences)

Output sentences:
['<s> This is an example sentence</s>', '<s> Each sentence is converted</s>']


In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('gpt2')
model = AutoModelForCausalLM.from_pretrained('gpt2')

# Sentences you want to generate
prompts = ["Once upon a time", "In a galaxy far, far away", "Today is a beautiful day"]

# Generate new sentences
generated_sentences = []
for prompt in prompts:
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    with torch.no_grad():
        output = model.generate(input_ids, max_length=50, num_return_sequences=1)
    generated_sentence = tokenizer.decode(output[0], skip_special_tokens=True)
    generated_sentences.append(generated_sentence)

print("Generated sentences:")
print(generated_sentences)


Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated sentences:
['Once upon a time, the world was a place of great beauty and great danger. The world was a place of great danger, and the world was a place of great danger. The world was a place of great danger, and the world was a', 'In a galaxy far, far away, the galaxy is a vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast,', 'Today is a beautiful day for the world.\n\nThe world is a beautiful place.\n\nThe world is a beautiful place.\n\nThe world is a beautiful place.\n\nThe world is a beautiful place.\n\nThe world is']


In [19]:
for i, s in enumerate(generated_sentences):
    print(i, s)

0 Once upon a time, the world was a place of great beauty and great danger. The world was a place of great danger, and the world was a place of great danger. The world was a place of great danger, and the world was a
1 In a galaxy far, far away, the galaxy is a vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast, vast,
2 Today is a beautiful day for the world.

The world is a beautiful place.

The world is a beautiful place.

The world is a beautiful place.

The world is a beautiful place.

The world is


In [9]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")  
model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")

In [10]:
# input text
sentence = "Remote work may also enhance work-life balance – because employees have more control over their work schedule, it’s easier for them to take care of personal errands in the morning or during lunch hour."

sentence = "paraphrase: " + sentence + " </s>"
encoding = tokenizer.encode_plus(sentence,padding=True, return_tensors="pt")
input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]

outputs = model.generate(
    input_ids=input_ids, attention_mask=attention_masks,
    max_length=256,
    do_sample=True,
    top_k=120,
    top_p=0.95,
    early_stopping=True,
    num_return_sequences=5
)
 
for output in outputs:
    line = tokenizer.decode(output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
    print(line)

Remote work may also improve work-life balance – because employees have more control over their work schedule, it’s easier for them to take care of personal errands in the morning or during lunch hour.
Remote work may also increase work-life balance – as employees have more control over their work schedule, it becomes easier for them to take care of personal errands in the morning or during lunch hour.
Remote work may also enhance work-life balance – because employees have more control over their work schedule, it’s easier for them to take care of personal errands in the morning or during lunch hour.
Remote work may also improve work-life balance – because employees have more control over their work schedule, it is easier for them to run personal errands in the morning or during lunch time.
Remote work may also improve work-life balance – because employees have more control over their work schedule, it’s easier for them to take care of personal errands in the morning or during lunchtim

In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")  
model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws").to('cuda')

sentence = "Performing crush action on wood object of egg category with deformability: rigid, material: wood, transparency: opaque, color: green, size: small, hardness: hard."

text =  "paraphrase: " + sentence + " </s>"

encoding = tokenizer.encode_plus(text,pad_to_max_length=True, return_tensors="pt")
input_ids, attention_masks = encoding["input_ids"].to("cuda"), encoding["attention_mask"].to("cuda")


outputs = model.generate(
    input_ids=input_ids, attention_mask=attention_masks,
    max_length=256,
    do_sample=True,
    top_k=120,
    top_p=0.95,
    early_stopping=True,
    num_return_sequences=5
)

for i, output in enumerate(outputs):
    line = tokenizer.decode(output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
    print(i, line)

0 Performing crushed action on the wood object of the egg category with deformability: rigid, material: wood, transparent: green, size: small, hardness: hard.
1 Conducting crush action on Wood object of the category Egg with deformability: rigid, material: wood, transparency: opaque, color: green, size: small, hardness: hard
2 Performing crush action on wood object of the Egg category with deformability: rigid material, transparency: opaque, color: green, size: small, hardness: hard.
3 Performs crush action on wood of the egg category with deformability: rigid, material: wood, transparency: opaque, color: green, size: small, hardness: hard.
4 Ample action on wood object of the Egg category with deformability: rigid, material: wood, transparency: opaque, color: green, size: small, hardness: hard.
