In [5]:
import os
import torch
from torch.nn import functional as F
import string
from transformers import BertTokenizer, BertForMaskedLM, top_k_top_p_filtering, logging
logging.set_verbosity_error()

no_words_to_be_predicted = globals()
select_model = globals()
enter_input_text = globals()

def set_model_config(**kwargs):
  for key, value in kwargs.items():
    print("{0} = {1}".format(key, value))
  
  no_words_to_be_predicted = list(kwargs.values())[0] # integer values
  select_model = list(kwargs.values())[1] # possible values = 'bert' or 'gpt' or 'xlnet'
  enter_input_text = list(kwargs.values())[2] #only string

  return no_words_to_be_predicted, select_model, enter_input_text


def load_model(model_name):
  try:
    if model_name.lower() == "bert":
      bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
      bert_model = BertForMaskedLM.from_pretrained('bert-base-uncased').eval()
      return bert_tokenizer,bert_model
    else:
        print("tf?")
  except Exception as e:
    pass


def get_all_predictions(text_sentence,  model_name, top_clean=5):
  if model_name.lower() == "bert":
    # ========================= BERT =================================
    input_ids, mask_idx = encode_bert(bert_tokenizer, text_sentence)
    with torch.no_grad():
      predict = bert_model(input_ids)[0]
    bert = decode_bert(bert_tokenizer, predict[0, mask_idx, :].topk(no_words_to_be_predicted).indices.tolist(), top_clean)
    return {'bert': bert}


# bert encode
def encode_bert(tokenizer, text_sentence, add_special_tokens=True):
  text_sentence = text_sentence.replace('<mask>', tokenizer.mask_token)
  # if <mask> is the last token, append a "." so that models dont predict punctuation.
  if tokenizer.mask_token == text_sentence.split()[-1]:
    text_sentence += ' .'
    input_ids = torch.tensor([tokenizer.encode(text_sentence, add_special_tokens=add_special_tokens)])
    mask_idx = torch.where(input_ids == tokenizer.mask_token_id)[1].tolist()[0]
  return input_ids, mask_idx
  
# bert decode
def decode_bert(tokenizer, pred_idx, top_clean):
  ignore_tokens = string.punctuation + '[PAD]'
  tokens = []
  for w in pred_idx:
    token = ''.join(tokenizer.decode(w).split())
    if token not in ignore_tokens:
      tokens.append(token.replace('##', ''))
  return '\n'.join(tokens[:top_clean])


def get_prediction_end_of_sentence(input_text, model_name):
  try:
    if model_name.lower() == "bert":
      input_text += ' <mask>'
      print(input_text)
      res = get_all_predictions(input_text, model_name, top_clean=int(no_words_to_be_predicted)) 
      return res
    else:
        print("Tf2?")

  except Exception as error:
    pass

try:
  print("Next Word Prediction with Pytorch using BERT")
  no_words_to_be_predicted, select_model, enter_input_text = set_model_config(no_words_to_be_predicted=5, select_model = "bert", enter_input_text = "How are you gonna cook the")
  if select_model.lower() == "bert":
    bert_tokenizer, bert_model  = load_model(select_model)
    res = get_prediction_end_of_sentence(enter_input_text, select_model)
    # print("result is: {}" .format(res))
    answer_bert = []
    print(res['bert'].split("\n"))
    for i in res['bert'].split("\n"):
      answer_bert.append(i)
      answer_as_string_bert = "    ".join(answer_bert)
except Exception as e:
  print('Some problem occured')

Next Word Prediction with Pytorch using BERT
no_words_to_be_predicted = 5
select_model = bert
enter_input_text = How are you gonna cook the
How are you gonna cook the <mask>
['food', 'dinner', 'pie', 'chicken', 'meal']


In [17]:
import imageio
import torch
import numpy as np
import os
from streamlit_drawable_canvas import st_canvas
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
from PIL import Image

model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

max_length = 16
num_beams = 4
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}


def predict_step(image_paths):
  images = []
  for image_path in image_paths:
    i_image = Image.open(image_path)
    if i_image.mode != "RGB":
      i_image = i_image.convert(mode="RGB")

    images.append(i_image)

  pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
  pixel_values = pixel_values.to(device)

  output_ids = model.generate(pixel_values, **gen_kwargs)

  preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
  preds = [pred.strip() for pred in preds]
  return preds


print(predict_step(['drawboard/drawn_image.png']))


['a piece of wood sitting on top of a tree']


In [14]:
import cv2

# Load YOLO model and configuration
yolo_net = cv2.dnn.readNet(r'C:\MITWPU\FirstLanguage Technologies\FLDemo\yolov3.weights', r'C:\MITWPU\FirstLanguage Technologies\FLDemo\yolov3.cfg')
with open(r'C:\MITWPU\FirstLanguage Technologies\FLDemo\coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]

# Load image
image_path = r'drawboard\drawn_image.png'  # Use raw string or double backslashes
image = cv2.imread(image_path)

# Preprocess image
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)

# Set input to the YOLO network
yolo_net.setInput(blob)

# Get output layer names
out_layer_names = yolo_net.getUnconnectedOutLayersNames()

# Run forward pass to get detections
detections = yolo_net.forward(out_layer_names)

# Process detections
detected_objects = []
for detection in detections:
    for obj in detection:
        scores = obj[5:]
        class_id = int(obj[1])
        confidence = scores[class_id]

        if confidence > 0.5:  # Minimum confidence threshold
            # Get detected class name
            class_name = classes[class_id]

            # Add detected object to the list
            detected_objects.append((class_name, confidence))

# Print the detected objects
for obj in detected_objects:
    class_name, confidence = obj
    print(f"Detected: {class_name} - Confidence: {confidence:.2f}")
