In [34]:
from PIL import Image
from io import BytesIO
import cv2
from imageai.Detection import ObjectDetection
from sklearn.preprocessing import MultiLabelBinarizer
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()

import plotly.express as px
from sklearn.feature_extraction.text import TfidfVectorizer

from instascrape import Profile 
from collections import defaultdict
from googletrans import Translator
import requests
import numpy as np
import os
import pickle

In [181]:
def createDetector(model='models/yolo.h5'):
    """
    The function creates an object detector based on a model
    
    Input:
    model (str):  path to a pretrained h5 model
    
    Output:
    imageai.Detection.ObjectDetection object
    
    """
    detector = ObjectDetection()
    if 'yolo' in model: 
        detector.setModelTypeAsYOLOv3()
    elif 'resnet' in model:
        detector.setModelTypeAsRetinaNet()
    execution_path = os.getcwd()
    detector.setModelPath( os.path.join(execution_path , model))# "resnet50_coco_best_v2.1.0.h5"))
    detector.loadModel()
    return detector

def getObjects(url, detector):
    """
    The function returns a dictionary of the objects with probabilities detected on the image from url
    based on a given model.
    
    input:
    idx (int): id of an image. Is used for tracking purposes only.
    url (str): url with an image
    model (str): path to a pre-trained object detection model 
    
    output:
    dict: a dictionary of the objects with probabilities detected on the image from url
    based on a given model.
    
    """

    try:
        pth = "imageTemporary.jpg"
        response = requests.get(url)
        img = Image.open(BytesIO(response.content))
        img.save(pth)
#         

        detections = detector.detectObjectsFromImage(input_image=pth, output_image_path=pth) 
#                                                      output_image_path=os.path.join(execution_path , 
#                                                                                     "imagenew.jpg"))
        return detections
    except:
        return None
    
def getColorData(imagePath, n = 10):
    """
    The function returns a dataframe with binned color histograms as columns
    
    Input:
    imagePath (str): image URL or path
    n (int):         number of bins
    
    Output:
    pandas dataframe
    
    """
    response = requests.get(imagePath)
    img = Image.open(BytesIO(response.content))
    img.save('imageTemporary.jpg')
    pth = "imageTemporary.jpg"
    img = cv2.imread("imageTemporary.jpg")
    color = ('b','g','r')
    dfC = pd.DataFrame()
    for i,col in enumerate(color):
        hist = cv2.calcHist([img],[i],None,[n],[0,256])
        for j, x in enumerate(hist):
            s = np.sum(hist)
            colname = f'{col}_{j}'
#                 dfC['id'] = row['id']
#                dfC['image128'] = [cv2.resize(img,(128,128))]
            dfC[colname]=x/s
    return dfC
    

def predictImageLikes(model, 
                      imgUrl,  
                      captionText = '', 
                      numOfTaggedUsers=0, 
                      numOfHashtags = None,
                      uploadDayOfWeek = 3,
                      numOfFollowers=None):
    """
    The function returns an approximate amount of likes based on the pos properties
    
    Input:
    model (sklearn model):     fitted model with predict method
    
    imagepath (str):           image path or URL
    
    captionText (str):         caption text for the post with hashtags (if present), 
                               default: ''
                               
    numOfHashtags (int):       number of hashtags, if None - calculates from the captionText, 
                               default: None
                               
    numOfTaggedUsers (int):    number of tagged accounts, 
                               default: 0
                               
    uploadDayOfWeek (int):     week day number of the planned post upload, 
                               default: 3 (wednesday)
                               
    numOfFollowers (int):      current number of followers, 
                               if None - relative score is returned, else absolute number of expected likes,
                               default: None
                               
                               
    Output:
    float:                     relative (if numOfFollowers is None) or absolute 
                               (multiplied by numOfFollowers) value of expected likes
    """
    columnsToFeed = ['imageRatio','numOfHashtags','numOfTaggedUsers','lenOfCaption','uploadDayOfWeek',
                     'airplane','apple','backpack','bear','bed','bench','bicycle','bird','boat','book',
                     'bottle','bowl','broccoli','bus','cake','car','cat','cell phone','chair','clock',
                     'couch','cow','cup','dining table','dog','donut','elephant','fire hydrant','fork',
                     'frisbee','giraffe','handbag','horse','hot dog','keyboard','kite','knife','laptop',
                     'microwave','motorcycle','mouse','orange','oven','person','pizza','potted plant',
                     'refrigerator','remote','sandwich','sheep','sink','skateboard','skis','spoon',
                     'sports ball','stop sign','suitcase','surfboard','teddy bear','tennis racket','tie',
                     'toilet','traffic light','train','truck','tv','umbrella','vase','wine glass','zebra',
                     'b_0','b_1','b_2','b_3','b_4','b_5','b_6','b_7','b_8','b_9','g_0','g_1','g_2','g_3',
                     'g_4','g_5','g_6','g_7','g_8','g_9','r_0','r_1','r_2','r_3','r_4','r_5','r_6','r_7',
                     'r_8','r_9','weightedLikes','logWeightedLikes']
    
    objectsList = columnsToFeed[5:-32]
    df = pd.DataFrame()
#     print('imageRatio:', imageRatio)
    
    if numOfHashtags==None:
        numOfHashtags = len(captionText.split('#'))-1
    
    df['numOfHashtags'] = [numOfHashtags]
#     print('numOfHashtags:', numOfHashtags)
    
    df['numOfTaggedUsers'] = len(captionText.split('@'))-1 + numOfTaggedUsers
#     print('numOfTaggedUsers:', numOfTaggedUsers)
    
    lenOfCaption = len(captionText)
    df['lenOfCaption'] = [lenOfCaption]
#     print('lenOfCaption:', lenOfCaption)
    
    df['uploadDayOfWeek'] = [uploadDayOfWeek]
#     print('uploadDayOfWeek:', uploadDayOfWeek)
    presentObjects = []

    
    try:
        response = requests.get(imgUrl)
        img = Image.open(BytesIO(response.content))
    except:
        img = None


    
    if img!=None:
        width, height = img.size
        imageRatio = width/height
#     print('imageRatio:', imageRatio)
    
        df=pd.concat([df,getColorData(imgUrl)],axis=1)
        df['imageRatio'] = imageRatio
        
        detectedObjects = getObjects(imgUrl, createDetector())
        
        for obj in detectedObjects:
            if obj['percentage_probability']>=0.8:
                presentObjects.append(obj['name'])
        presentObjects = list(set(presentObjects))
    #     print('presentObjects:', presentObjects)
        
        for obj in objectsList:
            if obj in presentObjects:
                df[obj]=[1]
            else:
                df[obj]=[0]

        df = df[columnsToFeed[:-2]]
        summary=f"imageRatio: {round(imageRatio,2)} <br>numOfHashtags: {numOfHashtags} <br>numOfTaggedUsers: {numOfTaggedUsers}<br>lenOfCaption: {lenOfCaption}<br>uploadDayOfWeek: {uploadDayOfWeek}<br>presentObjects: {presentObjects}"
        print(summary)
        if numOfFollowers!=None:
            return np.exp(model.predict(df))[0]*numOfFollowers, summary
        else:
            return np.exp(model.predict(df))[0], summary
    else:
        print('Error')
        return 'Error', 'Error. Try to provide another URL'

In [165]:
modelpath = 'models/xgbr_1.pkl'
model = pickle.load(open(filename, 'rb'))

In [175]:
dct = {'image'}
imgUrl = 'https://instagram.fmxp5-1.fna.fbcdn.net/v/t51.2885-15/sh0.08/e35/s640x640/12120240_1666899370193777_654241849_n.jpg?tp=1&_nc_ht=instagram.fmxp5-1.fna.fbcdn.net&_nc_cat=105&_nc_ohc=rOaFVzQ7SDUAX9C0Fdb&edm=AP_V10EBAAAA&ccb=7-4&oh=6707874d83e61e6de3477029a0311411&oe=60C8B8A4&_nc_sid=4f375e'
captionText = "Холмогоры — родина М. В. Ломоносова. #страна #путешествия #родина #природа #россия #природароссии #природа_россии #природамать #природапрекрасна #туризм"
predictImageLikes(model, imgUrl, captionText=captionText, numOfFollowers=19200)

Unnamed: 0,numOfHashtags,numOfTaggedUsers,lenOfCaption,uploadDayOfWeek
0,10,0,152,3


In [190]:
modelpath = 'models/xgbr_1.pkl'
model = pickle.load(open(filename, 'rb'))

imgURLs = ['https://s1.1zoom.ru/big3/11/Netherlands_Houses_478971.jpg',
           'https://w-dog.ru/wallpapers/3/12/352585658871245/milan-oboi-milan-sostav-milana-pato.jpg',
           'https://kartini-po-nomeram.com.ua/products_pictures/large_Y5754.jpg',
           'https://i.pinimg.com/originals/76/f9/90/76f990fc376b92cfe55c868d3880fb9c.webp']
captionTexts = ['asjfaoufbafuo', 'asopovspidvjisd pisjdvisdv','','qdwqwr']

dct = {'image':[], 'caption':[], 'properties':[], 'score':[]}
for i in tqdm(range(len(imgURLs))):
    dct['image'].append(imgURLs[i])
    try:
        dct['caption'].append(captionTexts[i])
        res = predictImageLikes(model, imgURLs[i], captionText=captionTexts[i])
    except:
        dct['caption'].append('')
        res = predictImageLikes(model, imgURLs[i], captionText='')
    dct['score'].append(res[0])
    dct['properties'].append(res[1])
dfInstaSeer = pd.DataFrame(dct)

# convert your links to html tags
def path_to_image_html(path):
    return '<img src="'+ path + '" width="400" >'


pd.set_option('display.max_colwidth', None)
dfInstaSeer.to_html('instaSeerDf.html', escape=False ,formatters=dict(image=path_to_image_html))

HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

imageRatio: 1.5 <br>numOfHashtags: 0 <br>numOfTaggedUsers: 0<br>lenOfCaption: 13<br>uploadDayOfWeek: 3<br>presentObjects: []
imageRatio: 1.51 <br>numOfHashtags: 0 <br>numOfTaggedUsers: 0<br>lenOfCaption: 26<br>uploadDayOfWeek: 3<br>presentObjects: ['person']
Error
imageRatio: 0.66 <br>numOfHashtags: 0 <br>numOfTaggedUsers: 0<br>lenOfCaption: 6<br>uploadDayOfWeek: 3<br>presentObjects: ['bicycle', 'person', 'boat']



In [206]:
with open('instaSeerDf.html', 'r') as file:
    data = file.read()
data = data.replace('<table border="1" class="dataframe">', '<table class="table">')

data = data.replace("style=\"text-align: right;\"", "style=\"text-align: left;\"")
data = '<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-BmbxuPwQa2lc/FVzBcNJ7UAyJxM6wuqIj61tLrc4wSX0szH/Ev+nYRRuWlolflfl" crossorigin="anonymous">\n' + data
# with open("InstaSeerDf2.html", "w") as file:
#      file.write("<link href=\"https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta2/dist/css/bootstrap.min.css\" rel=\"stylesheet\" integrity=\"sha384-BmbxuPwQa2lc/FVzBcNJ7UAyJxM6wuqIj61tLrc4wSX0szH/Ev+nYRRuWlolflfl\" crossorigin=\"anonymous\">")
with open("InstaSeerDf2.html", "w") as file:
    file.write(data)


'<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-BmbxuPwQa2lc/FVzBcNJ7UAyJxM6wuqIj61tLrc4wSX0szH/Ev+nYRRuWlolflfl" crossorigin="anonymous">\n<table class="table">\n  <thead>\n    <tr style="text-align: left;">\n      <th></th>\n      <th>image</th>\n      <th>caption</th>\n      <th>properties</th>\n      <th>score</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td><img src="https://s1.1zoom.ru/big3/11/Netherlands_Houses_478971.jpg" width="400" ></td>\n      <td>asjfaoufbafuo</td>\n      <td>imageRatio: 1.5 <br>numOfHashtags: 0 <br>numOfTaggedUsers: 0<br>lenOfCaption: 13<br>uploadDayOfWeek: 3<br>presentObjects: []</td>\n      <td>0.016975</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td><img src="https://w-dog.ru/wallpapers/3/12/352585658871245/milan-oboi-milan-sostav-milana-pato.jpg" width="400" ></td>\n      <td>asopovspidvjisd pisjdvisdv</td>\n      <td>imageRatio: 1.51 <br