<a href="https://colab.research.google.com/github/jayanthk82/smart-video-retrieval-system/blob/main/smart_video_retrevial_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install PyMuPDF spacy sentence-transformers chromadb
!pip install opencv-python

In [None]:
import os
import numpy as np
import cv2
import math
import tensorflow as tf
import chromadb
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer
from sentence_transformers import util
from PIL import Image
from transformers import BlipProcessor, BlipForQuestionAnswering

In [None]:
text_vectorization_model = SentenceTransformer("all-MiniLM-L6-v2")
video_captioning_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base",use_fast = True)
video_captioning_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")

In [None]:
def caption(raw_image,video_captioning_processor,video_captioning_model):
  #raw_image = Image.open(img_url).convert('RGB')
  question = "whats happening in the image"
  inputs = video_captioning_processor(raw_image, question, return_tensors="pt")
  out = video_captioning_model.generate(**inputs)
  return video_captioning_processor.decode(out[0], skip_special_tokens=True)


In [None]:
def summaries(video_path,video_captioning_processor,video_captioning_model):

  vidcap = cv2.VideoCapture(video_path)
  story = ''
  fps = vidcap.get(cv2.CAP_PROP_FPS)
  frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
  duration = frame_count / fps  # Total duration in seconds
  second = 0
  while True:
    vidcap.set(cv2.CAP_PROP_POS_FRAMES, math.floor(second * fps))
    ret,frame = vidcap.read()
    if not ret:
      break
    story += caption(Image.fromarray(frame),video_captioning_processor,video_captioning_model)
    story+=' '
    second += 1
  vidcap.release()
  return story


In [None]:
def folder_walkthrough(root_folder):
  data_paths = list()
  for dirpath, dirnames, filenames in os.walk(root_folder):
    for i in filenames:
      data_paths.append(os.path.join(dirpath,i))
  return data_paths

In [None]:
def chromadb_setup(setup_path,collection_name,vector,samples_count,summary):

  client = chromadb.PersistentClient(path=setup_path)
  collection = client.get_or_create_collection(name= collection_name)
  if not vector:
    return
  for i in range(samples_count):
    collection.add(
      ids = [str(i)],
      embeddings = [vector[i]],
      documents = [summary[i]]
  )
  return collection

In [None]:
def upload_your_dataset(video_address):
  chromadb_setup_path = '/content/drive/MyDrive/video_CHROMA_DB'
  chroma_collection_name = 'VIDEOs'
  samples_count = len(video_address)
  vector = list()
  summary = list()
  for i in video_address:
    summary.append(summaries(i,video_captioning_processor,video_captioning_model))
  for i in summary:
    vector.append(text_vectorization_model.encode(i,convert_to_numpy=True))
  return chromadb_setup(chromadb_setup_path,chroma_collection_name,vector,samples_count,summary)

In [None]:
def QUERY(QUERY,chromadb_collection):
  ChromaDB_Query_Embeddings = text_vectorization_model.encode(QUERY,convert_to_numpy=True)
  ChromaDB_Query_result = chromadb_collection.query(query_embeddings = ChromaDB_Query_Embeddings,
                 n_results=1)
  print('using chromaDB: Your query is related to the document is at ',video_address[int(ChromaDB_Query_result['ids'][0][0])])

In [None]:
video_address = folder_walkthrough('/content/drive/MyDrive/VIDEO_DATASET')
chromadb_collection = upload_your_dataset(video_address)


Enter your query: Dancing videos
using chromaDB: Your query is related to the document is at  /content/drive/MyDrive/VIDEO_DATASET/7269163-uhd_2160_3840_25fps.mp4


In [None]:
QUERY('coffe',chromadb_collection)

using chromaDB: Your query is related to the document is at  /content/drive/MyDrive/VIDEO_DATASET/6686511-uhd_3840_2160_25fps.mp4
