# Object Detection on Instagram Posts

## Importing Libraries

In [1]:
import cv2
import numpy as np
import pandas as pd
import webcolors
import json
import os

Tutorial I used
https://learnopencv.com/simple-background-estimation-in-videos-using-opencv-c-python/

## Output Dataframe

In [2]:
df = pd.DataFrame(columns=['handle_id', 'date_posted', 'instagram_link', 'content_type', 'post_description', 'number_tags', 'likes_post', 'comments_post', 'len_description', 'number_objects', 'classes_string','background_percentage', 'background_avg_color'])
df.head()

Unnamed: 0,handle_id,date_posted,instagram_link,content_type,post_description,number_tags,likes_post,comments_post,len_description,number_objects,classes_string,background_percentage,background_avg_color


## Defined Functions

In [3]:
def json_processing(folder_path, filename):
    try:
        flag_json = 0
        executed_flag = False
        filename_json = filename[:-3]
        filename_json = filename_json + 'json'
        folder_path_json = os.path.dirname(folder_path) + "\json"
        filename_json_path = os.path.join(folder_path_json, filename_json)
        date = filename[0:10]
    
        print(filename_json)
            
        with open(filename_json_path) as f:
            data = json.load(f)
        flag_json = 1 ########################### flag working OK
        executed_flag = True
        shortcode = ((data['node']['shortcode']))
        instagram_link = 'https://www.instagram.com/keralatourism/p/' + str(shortcode) + '/'
        description = ((data['node']['edge_media_to_caption']['edges'][0]['node']['text']))
        description_len = len(description)
        number_tags = len(data['node']['edge_media_to_tagged_user']['edges'])
        likes_post = (data['node']['edge_media_preview_like']['count'])
        comments_post = (data['node']['edge_media_to_comment']['count'])
        is_video_flag = data['node']['is_video']
        content_type = ''
        if(is_video_flag == True):
            content_type = 'Video'
        else:
            content_type = 'Image'
        print('is_video_flag:', is_video_flag, type(is_video_flag))
        print(instagram_link, 'tags:', number_tags,'likes:', likes_post, 'comments:', comments_post)
        
            
            # json_columns = {'instagram_link': instagram_link, 'number_tags':number_tags, 'likes_post': likes_post, 'comments_post': comments_post}
        
        output = [date, instagram_link, content_type, description, number_tags, likes_post, comments_post, description_len] 
        # pint(instagram_link, 'tags:', number_tags,'likes:', likes_post, 'comments:', comments_post)
    

    except FileNotFoundError:
        output = []
        print("File Not Found. Please check directory.")
        return output, executed_flag
    except:
        print("Some Error Occured.")
        return output, executed_flag
    else:
        print("JSON processed successfully.")
        return output, executed_flag
    

In [4]:
def closest_colour(requested_colour):
    min_colours = {}
    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - requested_colour[0]) ** 2
        gd = (g_c - requested_colour[1]) ** 2
        bd = (b_c - requested_colour[2]) ** 2
        min_colours[(rd + gd + bd)] = name
    return min_colours[min(min_colours.keys())]
        

In [5]:
## Computer Vision Function
###    Computer Vision Task 
def object_detection(file_path, model):
    flag_image = 1
    executed_flag = False
    # read the image from disk
    image = cv2.imread(file_path)
    
    image_height, image_width, _ = image.shape
    # create blob from image
    blob = cv2.dnn.blobFromImage(image=image, size=(300, 300), mean=(104, 117, 123), swapRB=True)
    # Initialize model & set the blob to the model
    model.setInput(blob)
    # load the COCO class names
    with open('object_detection_classes_coco.txt', 'r') as f:
        class_names = f.read().split('\n')
    # get a different color array for each of the classes
    COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))
    
    # forward pass through the model to carry out the detection
    output = model.forward()
      
    class_names_found = []
    non_background_pixels = 0
    x1 = 0; x2 = 0; y1 = 0; y2 = 0
    # loop over each of the detection
    for detection in output[0, 0, :, :]:
        #
        # extract the confidence of the detection
        confidence = detection[2]
        # draw bounding boxes only if the detection confidence is above...
        # ... a certain threshold, else skip 
        if confidence > .4:
            # get the class id
            class_id = detection[1]
            # map the class id to the class
            class_name = class_names[int(class_id)-1]
            color = COLORS[int(class_id)]
            class_names_found.append(class_name)
            x1 = int(detection[3] * image_width)
            y1 = int(detection[4] * image_height)
            x2 = int(detection[5] * image_width)
            y2 = int(detection[6] * image_height)
            non_background_pixels += abs(x2-x1)*abs(y2-y1) 
    # Count the non-background pixels within the bounding box region
            
    total_pixels = image.shape[0] * image.shape[1]
    
    number_objects = len(class_names_found)
    classes_string = str(class_names_found)
    percentage_background = (abs(total_pixels - non_background_pixels) / total_pixels) * 100
        
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # lur the grayscale image
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    # Calculate the average background color
    average_color = np.mean(blurred)
    # Convert the average color value to integers
    average_color = int(average_color)
    # Create a blank image of the average color
    color_image = np.zeros((100, 100, 3), np.uint8)
    color_image[:] = average_color
    # Convert the average color to RGB format
    average_color_rgb = tuple(map(int, color_image[0, 0]))
    # Print the average background color in RGB
    print("Average Background Color (RGB):", average_color_rgb)
    background_avg_color = closest_colour(average_color_rgb)
    executed_flag = True
    
    output = [number_objects, classes_string, percentage_background, background_avg_color]
    return output, executed_flag

## Constructing Dataset

In [6]:
folder_path =  r'C:\Users\aaryan\projkts\iim_ngp\instagram_scrape\rajasthan_tourism\images'
model = cv2.dnn.readNet(model='frozen_inference_graph.pb',
                        config='ssd_mobilenet_v2_coco_2018_03_29.pbtxt',
                        framework='TensorFlow')

# load the COCO class names
with open('object_detection_classes_coco.txt', 'r') as f:
   class_names = f.read().split('\n')
  
# get a different color array for each of the classes
COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))


iter = 0
for filename in os.listdir(folder_path):
    iter += 1
    if iter%500 == 0:
        df.to_csv(f"data_{iter}.csv")
    print('-------------------------------------------------------')
    print('ITER:', iter)
    print('-------------------------------------------------------')
    
    file_path = os.path.join(folder_path, filename)
    ## row that will be appended to the dataframe
    data_row = []
    data_row.append(os.path.basename(os.path.dirname(os.path.dirname(file_path))))
    print(data_row[0])
    print(os.path.dirname(os.path.dirname(file_path)))
    json_columns, flag_json = json_processing(folder_path, filename)
    print(flag_json,len(json_columns),json_columns)
    
    object_detection_columns, flag_img = object_detection(file_path, model)
    print(flag_img ,object_detection_columns)
    
    if(flag_json and flag_img):
        data_row = data_row + json_columns + object_detection_columns
        df.loc[len(df.index)] = data_row
        print(df.iloc[-1])
        
    


-------------------------------------------------------
ITER: 1
-------------------------------------------------------
rajasthan_tourism
C:\Users\aaryan\projkts\iim_ngp\instagram_scrape\rajasthan_tourism
2019-01-01_12-18-16_UTC_1.json
File Not Found. Please check directory.
False 0 []
Average Background Color (RGB): (139, 139, 139)
True [0, '[]', 100.0, 'gray']
-------------------------------------------------------
ITER: 2
-------------------------------------------------------
rajasthan_tourism
C:\Users\aaryan\projkts\iim_ngp\instagram_scrape\rajasthan_tourism
2019-01-01_12-18-16_UTC_2.json
File Not Found. Please check directory.
False 0 []
Average Background Color (RGB): (114, 114, 114)
True [0, '[]', 100.0, 'dimgray']
-------------------------------------------------------
ITER: 3
-------------------------------------------------------
rajasthan_tourism
C:\Users\aaryan\projkts\iim_ngp\instagram_scrape\rajasthan_tourism
2019-01-01_12-18-16_UTC_3.json
File Not Found. Please check di

In [9]:
df.to_csv('rajasthan_tourism_out2.csv')

In [11]:
df.tail()

Unnamed: 0,handle_id,date_posted,instagram_link,content_type,post_description,number_tags,likes_post,comments_post,len_description,number_objects,classes_string,background_percentage,background_avg_color
3014,keralatourism,2023-06-06,https://www.instagram.com/keralatourism/p/CtIv...,Image,Ek Ehsaas Banawat Ka!\n#RajasthanEkEhsaas\n\nI...,0,1861,15,457,2,"['person', 'teddy bear']",1.143144,darkgray
3015,keralatourism,2023-06-07,https://www.instagram.com/keralatourism/p/CtLT...,Image,Delve into the captivating and triumphant past...,0,1299,7,396,1,['person'],84.016804,darkgray
3016,keralatourism,2023-06-10,https://www.instagram.com/keralatourism/p/CtT3...,Image,Rajasthan's Jharokhas are exquisite and intric...,0,816,4,469,0,[],100.0,dimgray
3017,keralatourism,2023-06-14,https://www.instagram.com/keralatourism/p/CtdV...,Image,Delve into the captivating world of Rajasthan'...,0,1174,8,533,1,['person'],88.826046,darkgray
3018,keralatourism,2023-06-15,https://www.instagram.com/keralatourism/p/Ctf6...,Image,Jhalana Leopard Safari in Jaipur is home to ov...,1,768,5,629,0,[],100.0,gray


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3019 entries, 0 to 3018
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   handle_id              3019 non-null   object 
 1   date_posted            3019 non-null   object 
 2   instagram_link         3019 non-null   object 
 3   content_type           3019 non-null   object 
 4   post_description       3019 non-null   object 
 5   number_tags            3019 non-null   int64  
 6   likes_post             3019 non-null   int64  
 7   comments_post          3019 non-null   int64  
 8   len_description        3019 non-null   int64  
 9   number_objects         3019 non-null   int64  
 10  classes_string         3019 non-null   object 
 11  background_percentage  3019 non-null   float64
 12  background_avg_color   3019 non-null   object 
dtypes: float64(1), int64(5), object(7)
memory usage: 330.2+ KB
