# Task B - Caption Impact Analysis
In this task , the impact of Captions on the object detection was studied.
The analysis was done on output of same image with and without captions

### Qualitative Analysis
A visual comparison between the results of object detection of an image before and after object detection is shown

In [None]:
import os
import shutil
img_id=input()
img_path = f"../data/img/{img_id}.png"
# Check if the input image exists
if not os.path.exists(img_path):
    print("Input image not found.")
    raise(NameError)


file_dir="curdir"
if os.path.exists(file_dir):
    shutil.rmtree(file_dir)
os.makedirs(file_dir)

# Define the output image path
output_image_path = os.path.join(file_dir, "caption.jpg")
# Copy the input image to the output folder with the specified name
shutil.copy(img_path, output_image_path)

Here inpainting is done first to generate the image with no captions

In [None]:
import keras_ocr
import numpy as np
import math
import cv2
# helper Function to calculate midpoint of a line
def midpoint(x1, y1, x2, y2):
    x_mid = int((x1 + x2) / 2)
    y_mid = int((y1 + y2) / 2)
    return (x_mid, y_mid)


# Initialize keras-ocr pipeline
pipeline = keras_ocr.pipeline.Pipeline()

# Path to the image with text
image_path = 'curdir/caption.jpg'

# Read the image
image = keras_ocr.tools.read(image_path) 

# Recognize text in the image
predictions = pipeline.recognize([image])

# Create a mask for inpainting
mask = np.zeros(image.shape[:2], dtype="uint8")

# Iterate through predicted text regions and create mask
for box in predictions[0]:
    x0, y0 = box[1][0]
    x1, y1 = box[1][1] 
    x2, y2 = box[1][2]
    x3, y3 = box[1][3]
    
    # Calculate midpoints for line drawing
    x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
    x_mid1, y_mid1 = midpoint(x0, y0, x3, y3)
    
    # Calculate thickness based on line length
    thickness = int(math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2))
    
    # Draw line on mask
    cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mid1), 255, thickness)

# Inpaint the text regions
inpainted_image = cv2.inpaint(image, mask, 7, cv2.INPAINT_NS)


# Save the image without text
cv2.imwrite('curdir/nocaption.jpg', cv2.cvtColor(inpainted_image, cv2.COLOR_BGR2RGB))

In [None]:
from ultralytics import YOLO
model = YOLO('yolov8m.pt')

In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

model(source="curdir/caption.jpg", show=False, conf=0.5, save=True,project='curdir', name='captions')

image = mpimg.imread("curdir/captions/caption.jpg")

    # Display the image
plt.imshow(image)
plt.axis('off')  # Turn off axis
plt.show()

In [None]:
results_noc = model(source="curdir/nocaption.jpg", show=False, conf=0.5, save=True,project='curdir', name='no_captions')

image = mpimg.imread("curdir/no_captions/nocaption.jpg")

# Display the image
plt.imshow(image)
plt.axis('off')  # Turn off axis
plt.show()

### Quantitative Analysis of Captions
The object detection was run on dev split and the number of instances where the there was a discrepancy between the two are counted

In [None]:
def get_line_lengths(file_path):
    line_lengths = []
    with open(file_path, 'r') as file:
        for line in file:
            # strip() removes leading and trailing whitespace including newlines
            line_length = len(line.strip())
            line_lengths.append(line_length)
    return line_lengths


In [None]:

length_caption = get_line_lengths('objdetect_with_captions.txt')
length_nocaption=get_line_lengths('objdetect_without_captions.txt')
# print(length_caption)
c=0
for i in range(0,len(length_caption)):
    if length_caption[i]!=length_nocaption[i]:
        c+=1
        
print(c)

To get a more granular view of what objects are getting the more obstructed due to captions

In [None]:

from collections import defaultdict
import matplotlib.pyplot as plt

def process_file_lines(file_path):
    # Open the file for reading
    list=[]
    with open(file_path, 'r') as file:
        # Read lines from the file
        lines = file.readlines()
        
        # Process each line
        for line in lines:
            # You can add your processing logic here
            # For example, print the line
            list.append(line)
    return list


def count_dict(lines):
    # A dictionary to store the count of each object
    object_counts = defaultdict(int)

    # Regular expression to find objects and their counts
    pattern = re.compile(r'(\d+) (\w+(?: \w+)*)')

    # Process each line
    for line in lines:
        # Find all occurrences of the pattern
        matches = pattern.findall(line)
        for count, obj in matches:
            # Convert plural objects to singular form for consistency
            if obj.endswith('s'):
                obj = obj[:-1]
            # Update the count of the object
            object_counts[obj] += int(count)

    # Convert defaultdict to a regular dictionary for displat
    object_counts = dict(object_counts)
    return object_counts

def display(dict):
    words = list(dict.keys())
    counts = list(dict.values())

    # Creating the plot
    plt.figure(figsize=(18, 6)) # Adjust the figure size as needed
    plt.bar(words, counts, color='skyblue')

    # Adding titles and labels
    plt.title('Word Count Plot')
    plt.xlabel('Words')
    plt.ylabel('Counts')

    # Rotating the x-axis labels
    plt.xticks(rotation=45)

    # Display the plot
    plt.show()

# Example input lines
lines_caption=process_file_lines('caption_obj.txt')
lines_nocap=process_file_lines('no_caption_obj.txt')

dict_with=count_dict(lines_caption)
# print(dict_with)
dict_without=count_dict(lines_nocap)
# print(dict_without)

nocap_person=dict_with['person']
cap_person=dict_without['person']
# print(nocap_person)
# print(cap_person)
del dict_with['person']
del dict_without['person']



plt.figure(figsize=(18, 6)) 
words=["Persons With Caption","Persons Without Caption"]
# counts.append(cap_person)
# counts.append(nocap_person)
counts=[cap_person,nocap_person]
plt.bar(words, counts, color='skyblue')

# Adding titles and labels
plt.title('Word Count Plot')
plt.xlabel('Words')
plt.ylabel('Counts')

# Rotating the x-axis labels 
plt.xticks(rotation=45)

# Display the plot
plt.show()



display(dict_with)
display(dict_without)