In [1]:
!pip install transformers datasets

[0m

In [2]:
import av
import numpy as np
import torch
from transformers import AutoImageProcessor, AutoTokenizer, VisionEncoderDecoderModel

device = "cuda" if torch.cuda.is_available() else "cpu"

# load pretrained processor, tokenizer, and model
image_processor = AutoImageProcessor.from_pretrained("MCG-NJU/videomae-base")
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = VisionEncoderDecoderModel.from_pretrained("Neleac/timesformer-gpt2-video-captioning").to(device)

# load video
video_path = "/Users/pradhammummaleti/Desktop/clips/pineapple.mp4"
container = av.open(video_path)

# extract evenly spaced frames from video
seg_len = container.streams.video[0].frames
clip_len = model.config.encoder.num_frames
indices = set(np.linspace(0, seg_len, num=clip_len, endpoint=False).astype(np.int64))
frames = []
container.seek(0)
for i, frame in enumerate(container.decode(video=0)):
    if i in indices:
        frames.append(frame.to_ndarray(format="rgb24"))

# generate caption
gen_kwargs = {
    "min_length": 10, 
    "max_length": 20, 
    "num_beams": 8,
}
pixel_values = image_processor(frames, return_tensors="pt").pixel_values.to(device)
tokens = model.generate(pixel_values, **gen_kwargs)
caption = tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
print(caption) # A man and a woman are dancing on a stage in front of a mirror.

2023-12-18 14:59:01.078836: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


A woman is demonstrating how to cut a pineapple with a knife.


In [1]:
from audiodiffusion.audio_encoder import AudioEncoder

audio_encoder = AudioEncoder.from_pretrained("teticio/audio-encoder")
audio_file = "/Users/pradhammummaleti/Desktop/clips/pineapple.wav"
encoded_data = audio_encoder.encode([audio_file])

In [8]:
encoded_data

tensor([[ 11.0744,  23.6762, -30.3259,  39.1872, -37.6647,  -8.0132,  -2.3028,
           8.2763, -40.9545,  16.5539,   7.0898,  11.5276, -11.0206,  22.1024,
          17.9133,  40.7175,  22.1279,   6.0712, -27.0478, -27.1478,   2.3527,
          21.3279,   3.4097,  32.3277, -40.1387,  72.7095, -22.4377,  42.6610,
          22.8109,  -1.1885,   3.2350,   9.1994,  -7.4176,  38.4983,  -1.9953,
           8.3511,  60.2911, -53.9132,  -0.6156,  42.5981,  17.7433, -32.4823,
          26.7388,  -3.0530,   8.2417,  -2.2541,  20.6977, -32.3582,  53.1745,
          -5.8590, -19.4883, -12.1383,  -6.0315, -64.6113,  -7.9678, -37.3708,
          -6.4381, -22.8694,   9.3857,  40.2284, -14.7982,  -0.2618, -20.8238,
          16.8162,  -0.6041, -23.8430, -43.4468,   4.3810, -12.0016,  -4.4224,
         -29.6774,  24.1884,  -5.6692, -19.5042,   0.1824,   7.9631,  30.2965,
          -1.3448,   9.5877,   8.0240,   4.8490, -37.8526,  -8.0223,  19.9402,
          29.3661, -10.6023,  11.2181,  23.5371,  -2

In [72]:
import assemblyai as aai

aai.settings.api_key = "511b9e459c8e4140abbf57f7f19e8176"
transcriber = aai.Transcriber()

transcript = transcriber.transcribe("/Users/pradhammummaleti/Desktop/clips/pineapple.mp3")


In [79]:
from transformers import pipeline

summarizer = pipeline("summarization", model="Falconsai/text_summarization")

ARTICLE = """
Nowadays, you can certainly buy your pineapples at the grocery store, all cut up and ready to go. But I'm going to tell you, buying a fresh, whole pineapple is not only going to taste better, it's going to save you some money as well. So I'm going to show you the easiest way to cut a pineapple. First thing you're going to do is lie the pineapple down on its side, and using a nice sharp knife, you want to cut off the top and the bottom of the pineapple. Then stand the pineapple up and you're going to slice it straight down the center, creating two equal halves. Now, every pineapple has a core running right down the center, and it's too tough to eat, so it needs to be removed. You're going to see that the fibers of the core run in the opposite direction of the rest of the pineapple. So all you need to do is take the edge of your knife and line it up with the edge of the core and slice down and inward. Then you're going to slice each half of the pineapple in half and then take each quarter and slice it right down the center till you get to the skin. Then rotate that quarter and make half inch slices all the way down. Then for the last step, you're just going to take the edge of your knife and run it right between the flesh of the pineapple and the skin to loosen up all the pieces. Now, this is going to leave you with the perfect little bite sized pieces of pineapple that you can either serve as is in a little pineapple bolt or slide them right out and pop them in a bowl. For more simple and nutritious recipes, tips, and ideas, be sure to visit cleananddelicious.com. I'm Danny Speeds, and I'll see you next time.

"""

In [85]:
print(summarizer(ARTICLE, max_length=100, min_length=50, do_sample=False))

[{'summary_text': "I'm going to show you the easiest way to cut a pineapple . First thing you're going to do is lie the pineapple down on its side . Then stand the pineapple up and slice it straight down the center . This is going to leave you with the perfect little bite sized pieces of pineapple that you can either serve as is in a little pineapple bolt or slide them right out and pop them in ."}]
