In [1]:
import os

import pandas as pd

import cv2 as cv
import numpy as np

from PIL import Image, ImageOps

In [2]:
def capture_screenshot(out_route=False, preprocess=True):
    """
    returns screencapture and the datetime it was taken
    
    inputs
    > out_route
        >> optional path to save the raw screenshot at
            > default == False
    > preprocess
        >> return output of .preprocess_screenshot() instead of raw screenshot 
            > out_route still saves raw screenshot if enabled
            > default == True
    """
    # capture screenshot & resize to 720p
    try:
        base_screenshot = pyautogui.screenshot() 
    except Exception as e:
        print(e)  # OSError: screen grab failed
        print('retrying in seconds...')
        sleep(3)
        base_screenshot = pyautogui.screenshot()
    
    record_datetime = str(datetime.now())
    
    if out_route:
        base_screenshot.save(out_route)
        
    if preprocess:
        return preprocess_screenshot(base_screenshot), record_datetime
        
    else:
        return base_screenshot, record_datetime


def preprocess_screenshot(screenshot):
    """
    input: PIL Image
    
    output: 1280x720p, bgr -> grayscale screenshot (nd.array)
    """
    screenshot = screenshot.resize((1280, 720))

    grayscale = ImageOps.grayscale(screenshot)  # want to switch this to subtracting the mean

    # translate colors to opencv
    screenshot = cv.cvtColor(np.array(grayscale), cv.COLOR_RGB2BGR)  # why/is this necessary? w/ grayscale already done?

    return screenshot

In [3]:
class LabelNumbers():
    def __init__(self):
        self.data_dir = 'media/test_record_kills_and_players_remaining/'
        self.output_dir = 'media/stable_numbers/'
        
        self.records_dct = {'labeled_numbers' : 'stable_numbers.csv', 'to_be_converted' : f'{self.data_dir}sample_records.csv'}
        
        self.update_df()
        
        self.n_correct_preds = 0
        self.n_incorrect_preds = 0
        self.n_total_errors = 0
        
    def ask_human(self, update_df=True):
        if update_df:
            self.update_df(print_out=True)
            
        self.stop_asking_human = False
            
        temp = []
        n = 0
            
        for _ in range(self.n_existing_loops + 1, self.n_existing_loops + 1 + self.n_to_label):
            
            if (n != 0) and (n % 100 == 0):
                ask = input('want to keep going? ')
                if (ask != '') and (ask.lower() != 'y') and (ask.lower() != 'yes'):
                    print("ok, let't stop.")
                    break
                else:
                    print("ok, I'll ask again after 100 more screenshots.")
                        
            if self.stop_asking_human == True:
                print("ok, let's stop.")
                break
            
            i = self.df_for_conversion.iloc[n]
            n += 1
            
            if _ < 10:
                loop = f'loop_000000{_}'
            elif 10 <= _ < 10**2:
                loop = f'loop_00000{_}'
            elif 10**2 <= _ < 10**3:
                loop = f'loop_0000{_}'
            elif 10**3 <= _ < 10**4:
                loop = f'loop_000{_}'
            elif 10**4 <= _ < 10**5:
                loop = f'loop_00{_}'
            elif 10**5 <= _ < 10**6:
                loop = f'loop_0{_}'
            else:
                loop = f'loop_{_}'
            
            new_og_screenshot_file_path = f'{self.output_dir}og_screenshots/{loop}.jpg'
            try:
                Image.open(i['og_screenshot']).save(new_og_screenshot_file_path)
                os.remove(i['og_screenshot'])
            except Exception as e:
                self.n_total_errors += 1
                print(e)
            
            for rf in i[['k_reference_file', 'pr_reference_file']].values:
                im = cv.imread(rf, cv.IMREAD_UNCHANGED)

                cv.imshow('digits_small', im)
                
                im = Image.fromarray(im)
                im_big = im.resize((im.size[0]*9, im.size[1]*9))

                cv.imshow('digits_large', np.array(im_big))       
                key = cv.waitKey(100)

                if '/n_kills/' in rf:
                    digits = i['n_kills']
                    crop_type = 'k'
                elif '/n_players_remaining/' in rf:
                    digits = i['n_players_remaining']
                    crop_type = 'pr'
                                
                q = input(f'The digits in this image are {digits}? ')
                if q == 'STOP':
                    self.stop_asking_human = True
                    break
                elif (q != '') and (q.lower() != 'y') and (q.lower() != 'yes') and (q != f'{digits}'):
                    self.n_incorrect_preds += 1
                    try:
                        digits = int(q)
                    except:
                        if (q == 'n') or (q.lower() == 'e') or (q.lower() == 'b') or (q.lower() == 'bb'):
                            digits = q
                        else:
                            digits = input('What digits are in this image? ')
                else:
                    self.n_correct_preds += 1
                
                new_crop_file_path = f'{self.output_dir}number_crops/{loop}{crop_type}.jpg'
                im.save(new_crop_file_path)
                os.remove(rf)
                
                label_row = [digits, new_crop_file_path, crop_type, i['record_timestamp'], new_og_screenshot_file_path, i['top_left']]
                
                temp.append(label_row)
                
            # update self.records_dct CSVs and self.existing_df 
            # ***LEAVE self.df_for_conversion AS IS***
            if (_ % 21 == 0) and (_ != 0):
                temp_df = self.df_for_conversion.tail(self.n_to_label - n)
                temp_df.to_csv(f'{self.records_dct["to_be_converted"]}', index=False)
                
                temp_df = pd.DataFrame(temp, columns=self.existing_df.columns)
                self.existing_df = pd.concat([self.existing_df, temp_df], axis=0)
                self.existing_df.to_csv(f'{self.records_dct["labeled_numbers"]}', index=False)
                
                temp = []
        
        # update CSVs and DataFrames
        self.df_for_conversion = self.df_for_conversion.tail(self.n_to_label - n)
        self.df_for_conversion.to_csv(f'{self.records_dct["to_be_converted"]}', index=False)

        temp_df = pd.DataFrame(temp, columns=self.existing_df.columns)
        self.existing_df = pd.concat([self.existing_df, temp_df], axis=0)
        self.existing_df.to_csv(f'{self.records_dct["labeled_numbers"]}', index=False)
        
        return self.n_correct_preds, self.n_incorrect_preds
        
    def pull_instance_data(self, reference_file, df='default'):
        # pull from df for conversion by default
        if df == 'default':
            df = self.df_for_conversion
        # look at reference file columns
        for ref_column in ['k_reference_file', 'pr_reference_file']:
            # see if reference file is here
            i_df = self.df_for_conversion.loc[self.df_for_conversion[ref_column] == reference_file]
            # it is, is it only here once?
            if len(i_df) == 1:
                return i_df
            # it is not (fine), or it's here more than once (error)
            else:
                if len(i_df) != 0:
                    self.n_total_errors += 1
                    print(f'len(i_df) == {len(i_df)}')
        return None
    
    def update_df(self, print_out=False):
        # load dataframe for conversion & note how long it is
        self.df_for_conversion = pd.read_csv(f'{self.records_dct["to_be_converted"]}')
        self.n_to_label = len(self.df_for_conversion)
        
        # load dataframe of labeled numbers & note how long it is
        self.existing_df = pd.read_csv(f'{self.records_dct["labeled_numbers"]}')
        self.n_existing_labeled = len(self.existing_df)
        
        if print_out:
            print(f'n to label: {self.n_to_label}')
            print(f'n labeled : {self.n_existing_labeled}')
        
        # do labeled crops exist?
        if self.n_existing_labeled > 0:
            # there is more than one crop per loop, figure out how many loops by looking at # of screenshots
            last_existing_labeled_crop = [f for f in os.listdir(f'{self.output_dir}og_screenshots/') if '.jpg' in f][-1]
            last_existing_loop = last_existing_labeled_crop[-11:-4]
            self.n_existing_loops = int(last_existing_loop)
        else:
            # no, so 0 loops (screenshots) have been processed
            self.n_existing_loops = 0
            

    def recrop_screenshots(self, existing_df, output_dir='media/stable_numbers/recrop_numbers/'):
        """
        recrop list of images like Numbers.record_livestream()
                
        used to correct issues with past images by applying current processing
        
        runtime: ~500 images / minute
        
        inputs
        ------
        > existing_df
            >> pandas dataframe of existing labeled screenshots and their crops
        > output_dir
            >> where to store the new crops
            >> default == 'media/stable_numbers/recrop_numbers/'
        """
        self.icons_dir = 'media/icons/'

        dark_needle_img = 'dark_kills_counter_skull_icon.jpg'
        dark_needle_icon = cv.imread(f'{self.icons_dir}{dark_needle_img}', cv.IMREAD_UNCHANGED)
        
        dark_needle_img_2 = 'dark_kills_counter_skull_icon_2.jpg'
        dark_needle_icon_2 = cv.imread(f'{self.icons_dir}{dark_needle_img_2}', cv.IMREAD_UNCHANGED)
        
        pr_needle_img = 'players_remaining_icon.png'
        players_remaining_icon = cv.imread(f'{self.icons_dir}{pr_needle_img}', cv.IMREAD_GRAYSCALE)
        players_remaining_icon = cv.cvtColor(players_remaining_icon, cv.COLOR_RGB2BGR)

        n_kills_crop = (136, 0, 174, 28)  # 28x28 crop: (139, 0, 167, 28)
        n_players_remaining_crop = (84, 0, 122, 28)
        
        self.top_left = None
        self.trusted_top_left = None
        self.top_left_pr_icon = None
        self.n_save_errors = 0
        
        temp = []
        
        for _ in range(len(existing_df)):
            instance_details = existing_df.iloc[_]

            file_path = instance_details['og_screenshot']
                
            try:

                # capture & save greyscaled bgr screenshot (1280, 720)
                og_screenshot = Image.open(file_path)
                screenshot = preprocess_screenshot(og_screenshot)

                # crop top (25%) right corner of the screenshot
                top_right_numbers_screenshot = Image.fromarray(screenshot.copy())
                top_right_numbers_screenshot = top_right_numbers_screenshot.crop((int(1280*0.75), 0, 1280, int(720*.25)))
                top_right_numbers_screenshot = np.array(top_right_numbers_screenshot)

                # look for n_kills skull icons
                for needle_img in [dark_needle_icon_2]:  # dark_needle_icon
                    result = cv.matchTemplate(top_right_numbers_screenshot, needle_img, cv.TM_CCOEFF_NORMED)

                    min_val, max_val, min_loc, max_loc = cv.minMaxLoc(result)

                    threshold = 0.8
                    # do we have a satasfactory match?
                    if max_val >= threshold:

                        needle_w = needle_img.shape[1]
                        if needle_w < 18:
                            print(f'needle_w=={needle_w}, adding 5')
                            needle_w += 5
                        needle_h = needle_img.shape[0]

                        # tag top left corner, add width & height to find bottom right corner of icon
                        top_left = max_loc 
                        bottom_right = (top_left[0] + (needle_w) - 5, top_left[1] + needle_h)

                        # are we within logical area?
                        if (top_left[0] > 120) and (90 > top_left[1] > 10):
                            self.top_left = top_left
                            self.bottom_right = bottom_right

                            # black out kill skull icon
                            cv.rectangle(top_right_numbers_screenshot, top_left, bottom_right, color=(0, 0, 0), thickness=-1)

                    # no, fell short of threshold, so go back to the last logical area we had
                    else:
                        if self.trusted_top_left is not None:
                            top_left = self.trusted_top_left
                        else:
                            top_left = self.top_left
                        # have we gotten a top left yet?
                        if top_left is not None:
                            # are we within logical area?
                            if (top_left[0] > 120) and (90 > top_left[1] > 10):
                                bottom_right = self.bottom_right
                                # black out kill skull icon
                                cv.rectangle(top_right_numbers_screenshot, top_left, bottom_right, color=(0, 0, 0), thickness=-1)

                # do we have a top left?
                if top_left is not None:

                    # correct bottom right and expand left to just grab all linearly alligned numbers at once
                    full_bottom_right = (top_left[0] + (needle_w * 2), top_left[1] + needle_h)
                    full_top_left = tuple([top_left[0]-125, top_left[1]]) 

                    # crop full top right numbers bar
                    top_right_numbers_screenshot_2 = top_right_numbers_screenshot[full_top_left[1]:full_bottom_right[1], 
                                                                                  full_top_left[0]:full_bottom_right[0]]
                    # make sure we still have a screenshot
                    if top_right_numbers_screenshot_2.size != 0:
                        top_right_numbers_screenshot = top_right_numbers_screenshot_2
                        self.trusted_top_left = self.top_left

                        # look for players remaining icon
                        for needle_img_2 in [players_remaining_icon]: 
                            try:
                                result_2 = cv.matchTemplate(top_right_numbers_screenshot, needle_img_2, cv.TM_CCOEFF_NORMED)
                                min_val_2, max_val_2, min_loc_2, max_loc_2 = cv.minMaxLoc(result_2)

                                threshold_2 = 0.8
                                needle_2_w = needle_img_2.shape[1]
                                needle_2_h = needle_img_2.shape[0] + 10

                                # do we have a satasfactory match?
                                if max_val_2 >= threshold_2:

                                    # tag top left corner, add width & height to find bottom right corner
                                    top_left_2 = max_loc_2  # want rectangle
                                    bottom_right_2 = (top_left_2[0] + (needle_2_w) - 5, top_left_2[1] + needle_2_h)

                                    self.top_left_pr_icon = top_left_2
                                    self.bottom_right_pr_icon = bottom_right_2

                                    # black out players remaining icon
                                    cv.rectangle(top_right_numbers_screenshot, top_left_2, bottom_right_2, color=(0, 0, 0), thickness=-1)
                                # no, so go back to the last one we had
                                else:
                                    top_left_2 = self.top_left_pr_icon
                                    # do we have this?
                                    if top_left_2 is not None:
                                        bottom_right_2 = self.bottom_right_pr_icon
                                        # black out players remaining icon
                                        cv.rectangle(top_right_numbers_screenshot, top_left_2, bottom_right_2, color=(0, 0, 0), thickness=-1)
                            except Exception as e:
                                self.n_total_errors += 1
                                print(e)
                    # bad top left value, crop not as expected
                    else:
                        if self.trusted_top_left is None:
                            # no prior success with top_left, forget it
                            self.top_left = None
                        else:
                            # revert to last successful top_left value
                            self.top_left = self.trusted_top_left
                            top_left = self.top_left
                            # correct bottom right and expand left to just grab all linearly alligned numbers at once
                            full_bottom_right = (top_left[0] + (needle_w * 2), top_left[1] + needle_h)
                            full_top_left = tuple([top_left[0]-125, top_left[1]]) 

                            # crop full top right numbers bar
                            top_right_numbers_screenshot = top_right_numbers_screenshot[full_top_left[1]:full_bottom_right[1], 
                                                                                        full_top_left[0]:full_bottom_right[0]]

                    try:
                        # convert opencv back to PIL
                        i = Image.fromarray(top_right_numbers_screenshot)

                        loop = file_path.split('/')[-1]
                        loop = loop.split('.jpg')[0]

                        # crop kills or players remaining numbers
                        if instance_details['type'] == 'k':
                            out_route = f'{output_dir}{loop}k.jpg'
                            crop = i.crop(n_kills_crop)
                        elif instance_details['type'] == 'pr':
                            out_route = f'{output_dir}{loop}pr.jpg'
                            crop = i.crop(n_players_remaining_crop)
                        else:
                            self.n_total_errors += 1
                            raise Exception(f"error: instance_details['type'] == {instance_details['type']} | only 'k' and 'pr' are currently supported.")

                        # save cropped images (numbers)
                        crop.save(out_route)

                        new_details = instance_details.copy()

                        new_details['og_file_path'] = instance_details['file_path']
                        new_details['file_path'] = out_route

                        new_details['og_top_left'] = instance_details['top_left']
                        new_details['top_left'] = self.top_left

                        temp.append(new_details)   

                    except Exception as e:
                        self.n_save_errors += 1
                        print(e)
                        
            except Exception as e:
                self.n_total_errors += 1
                print(e)
                    

        pd.DataFrame(temp).to_csv('recropped_stable_numbers.csv', index=False)
        
        
    def compare_recropped(self, errors_only=True):
        """
        compare images from .recrop_screenshots() with existing screenshots
        
        overwrite existing screenshot with new screenshot with manual input
        
        inputs
        ------
        > errors_only
            >> processes errors (non int/float `numbers` values) only
            >> default == True
        """
        recropped_df = pd.read_csv('recropped_stable_numbers.csv')
        changed_crops = recropped_df.loc[recropped_df.top_left != recropped_df.og_top_left]
        
        if errors_only:
            # copy changed crops df to focus on error labels
            changed_error_crops = changed_crops.copy()
            # remove all digit only labels 0 - 160
            for i in range(160):
                changed_error_crops = changed_error_crops.loc[(changed_error_crops.numbers != f'{i}') & (changed_error_crops.numbers != f'{float(i)}')]
            changed_crops = changed_error_crops
        else:
            print('--- comparing all changed crops ---')
            
        changed_crops = changed_crops.reset_index(drop=True)
        
        # go through and compare crops (that were errors and see if the new crop resolved) 
        for _ in range(len(changed_crops)):
            # focus this instance
            instance_details = changed_error_crops.iloc[_]
            
            try:
                # load & display images (exception will happen in rare case file does not exist)
                og_crop = cv.imread(instance_details['og_file_path'])
                new_crop = cv.imread(instance_details['file_path'])

                cv.imshow('og_crop', og_crop)
                cv.imshow('new_crop', new_crop)       

                key = cv.waitKey(100)

                # ask human for input
                ask = input(f'are the numbers in this still {instance_details["numbers"]}? ')

                if (ask == '') or (ask.lower() == 'y') or (ask.lower() == 'yes'):
                    pass
                else:
                    try:
                        # convert input to integer
                        ask = int(ask)
                    except:
                        # make sure we want to replace (replacing errors kinda weird)
                        tell = input('do you want to replace the og crop with the new crop? ')
                        if (tell != '') and (tell.lower() != 'y') and (tell.lower() != 'yes'):
                            print("ok, let's pass")
                            ask = False
                        else:
                            # yes, ok, record any input
                            ask = input('what are the digits in the new crop? ')
                # are we replacing numbers?
                if ask:
                    try:
                        # find this instance in main CSV record
                        og_instance = self.existing_df.loc[self.existing_df['file_path'] == instance_details['og_file_path']]

                        # replace existing numbers value with input numbers
                        if len(og_instance) == 1:
                            new_instance = og_instance.copy()
                            new_instance['numbers'] = ask
                            self.existing_df.loc[self.existing_df['file_path'] == instance_details['og_file_path']] = new_instance
                        else:
                            self.n_total_errors += 1
                            raise Exception(f'error: len(og_instance) != 1 | len(og_instance) == {len(og_instance)} | {instance_details["file_path"]}')

                        # delete old image, save new image, delete old copy of new image
                        os.remove(instance_details['og_file_path'])
                        Image.fromarray(new_crop).save(instance_details['og_file_path'])
                        os.remove(instance_details['file_path'])

                        # save new CSV
                        self.existing_df.to_csv(self.records_dct['labeled_numbers'], index=False)

                        # remove row from recropped df, then save CSV
                        recropped_df = recropped_df.loc[recropped_df['file_path'] != instance_details['file_path']]
                        recropped_df.to_csv('recropped_stable_numbers.csv', index=False)
                    except Exception as e:
                        self.n_total_errors += 1
                        print(e)
                else:
                    pass
            except Exception as e:
                self.n_total_errors += 1
                print(e)

In [4]:
ln = LabelNumbers()

In [5]:
# ln.df_for_conversion

In [8]:
# %%time
# ln.recrop_screenshots(ln.existing_df)

[Errno 2] No such file or directory: 'media/stable_numbers/og_screenshots/loop_0003028.jpg'
[Errno 2] No such file or directory: 'media/stable_numbers/og_screenshots/loop_0003028.jpg'
Wall time: 15min 27s


In [6]:
ln.compare_recropped()

are the numbers in this still n?  1


error: len(og_instance) != 1 | len(og_instance) == 3 | media/stable_numbers/recrop_numbers/loop_0000099k.jpg


are the numbers in this still n?  140


error: len(og_instance) != 1 | len(og_instance) == 3 | media/stable_numbers/recrop_numbers/loop_0000099pr.jpg


are the numbers in this still i37?  142
are the numbers in this still 12b?  3
are the numbers in this still i36?  128
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still n?  
are the numbers in this still e?  
are the numbers in this still n?  
are the numbers in this still e?  
are the numbers in this still n?  
are the numbers in this still e?  
are the numbers in this still n?  
are the numbers in this still e?  
are the numbers in this still n?  
are the numbers in this still n?  
are the numbers in this still n?  
are the numbers in this still n?  
are the numbers in this still n?  
are the numbers in this still n?  
are the numbers in this still n?  
are the

ok, let's pass


are the numbers in this still n?  6
are the numbers in this still i6?  82
are the numbers in this still n?  6
are the numbers in this still i6?  82
are the numbers in this still n?  6
are the numbers in this still n?  80
are the numbers in this still n?  b
do you want to replace the og crop with the new crop?  b


ok, let's pass


are the numbers in this still n?  30
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still n?  
are the numbers in this still n?  b
do you want to replace the og crop with the new crop?  b


ok, let's pass


are the numbers in this still b1?  81
are the numbers in this still e?  bb
do you want to replace the og crop with the new crop?  bb


ok, let's pass


are the numbers in this still e?  61
are the numbers in this still e?  10
are the numbers in this still e?  b
do you want to replace the og crop with the new crop?  b


ok, let's pass


are the numbers in this still e?  b
do you want to replace the og crop with the new crop?  y
what are the digits in the new crop?  b
are the numbers in this still e?  b
do you want to replace the og crop with the new crop?  
what are the digits in the new crop?  b
are the numbers in this still e?  b
do you want to replace the og crop with the new crop?  
what are the digits in the new crop?  b
are the numbers in this still e?  bb
do you want to replace the og crop with the new crop?  
what are the digits in the new crop?  bb
are the numbers in this still n?  
are the numbers in this still n?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still b?  
are the numbers in this still n?  b
do you want to replace the og crop with the new crop?  
what are the digits in the new crop?  b
are the numbers in this still n?  
are the numbers in this still n?  
are the numbers in this still b?  
are the numbers in thi

ok, let's pass


are the numbers in this still i7?  31
are the numbers in this still i3?  7
are the numbers in this still i3?  7
are the numbers in this still i7?  31
are the numbers in this still i3?  7
are the numbers in this still i3?  7
are the numbers in this still i3?  7
are the numbers in this still i3?  8
are the numbers in this still i3?  8
are the numbers in this still i3?  8
are the numbers in this still b?  31
are the numbers in this still b?  8
are the numbers in this still n?  31
are the numbers in this still n?  8
are the numbers in this still n?  31
are the numbers in this still n?  8
are the numbers in this still b?  31
are the numbers in this still n?  8
are the numbers in this still n?  31
are the numbers in this still n?  8
are the numbers in this still b?  31
are the numbers in this still n?  8
are the numbers in this still n?  31
are the numbers in this still n?  8
are the numbers in this still b?  31
are the numbers in this still n?  8
are the numbers in this still n?  31
are the

In [None]:
# 22 surrounded by 23s should be 23, not 22
# 16 followed by a bunch of 18s should probably be 18, check og screenshot (after 18s, nearby followed by 18/25 mix)
# last 24 in 27,17 chain (right before 25,17 chain) should be 25, not 24

In [7]:
df = pd.read_csv('recropped_stable_numbers.csv')

for i in range(160):
    df = df.loc[(df.numbers != f'{i}') & (df.numbers != f'{float(i)}')]
    
df.loc[df.top_left != df.og_top_left]#.top_left.value_counts()

Unnamed: 0,numbers,file_path,type,record_timestamp,og_screenshot,top_left,og_file_path,og_top_left
174,n,media/stable_numbers/recrop_numbers/loop_00000...,k,2020-09-13 03:16:47.092816,media/stable_numbers/og_screenshots/loop_00000...,"(235, 27)",media/stable_numbers/number_crops/loop_0000099...,"(229, 30)"
175,n,media/stable_numbers/recrop_numbers/loop_00000...,pr,2020-09-13 03:16:47.092816,media/stable_numbers/og_screenshots/loop_00000...,"(235, 27)",media/stable_numbers/number_crops/loop_0000099...,"(229, 30)"
1274,b,media/stable_numbers/recrop_numbers/loop_00006...,k,2020-09-13 05:28:32.420208,media/stable_numbers/og_screenshots/loop_00006...,"(235, 27)",media/stable_numbers/number_crops/loop_0000647...,"(212, 40)"
1275,b,media/stable_numbers/recrop_numbers/loop_00006...,pr,2020-09-13 05:28:32.420208,media/stable_numbers/og_screenshots/loop_00006...,"(235, 27)",media/stable_numbers/number_crops/loop_0000647...,"(212, 40)"
1276,b,media/stable_numbers/recrop_numbers/loop_00006...,k,2020-09-13 05:28:32.651099,media/stable_numbers/og_screenshots/loop_00006...,"(235, 27)",media/stable_numbers/number_crops/loop_0000648...,"(212, 40)"
...,...,...,...,...,...,...,...,...
9308,e,media/stable_numbers/recrop_numbers/loop_00052...,pr,2020-09-15 00:31:30.199664,media/stable_numbers/og_screenshots/loop_00052...,"(235, 27)",media/stable_numbers/number_crops/loop_0005209...,"(229, 80)"
9309,e,media/stable_numbers/recrop_numbers/loop_00052...,k,2020-09-15 00:31:30.420105,media/stable_numbers/og_screenshots/loop_00052...,"(235, 27)",media/stable_numbers/number_crops/loop_0005210...,"(229, 80)"
9310,e,media/stable_numbers/recrop_numbers/loop_00052...,pr,2020-09-15 00:31:30.420105,media/stable_numbers/og_screenshots/loop_00052...,"(235, 27)",media/stable_numbers/number_crops/loop_0005210...,"(229, 80)"
9311,e,media/stable_numbers/recrop_numbers/loop_00052...,k,2020-09-15 00:31:30.621323,media/stable_numbers/og_screenshots/loop_00052...,"(235, 27)",media/stable_numbers/number_crops/loop_0005211...,"(229, 80)"


In [22]:
print(f'of {1391} erroneous crops where top_left was changed by .recrop_screenshots(), {1391 - 180} crops were updated | {str((1391 - 180) / 1391 * 100)[:5]}% change rate')

of 1391 erroneous crops where top_left was changed by .recrop_screenshots(), 1211 crops were updated | 87.05% change rate


In [2]:
df = pd.read_csv('recropped_stable_numbers.csv')
temp_df = df.copy().head(0)

for i in range(160):
    t = df.loc[(df.numbers == f'{i}') | (df.numbers == f'{float(i)}')]
    temp_df = pd.concat([temp_df, t])

df = temp_df.copy()
df.loc[df.top_left != df.og_top_left]#.top_left.value_counts()

Unnamed: 0,numbers,file_path,type,record_timestamp,og_screenshot,top_left,og_file_path,og_top_left
12528,0.0,media/stable_numbers/recrop_numbers/loop_00062...,k,2020-09-15 22:13:00.475064,media/stable_numbers/og_screenshots/loop_00062...,"(245, 26)",media/stable_numbers/number_crops/loop_0006281...,"(245, 27)"
180,1.0,media/stable_numbers/recrop_numbers/loop_00000...,k,2020-09-13 04:14:04.701539,media/stable_numbers/og_screenshots/loop_00000...,"(235, 27)",media/stable_numbers/number_crops/loop_0000099...,"(233, 21)"
182,1.0,media/stable_numbers/recrop_numbers/loop_00001...,k,2020-09-13 04:14:04.918617,media/stable_numbers/og_screenshots/loop_00001...,"(235, 27)",media/stable_numbers/number_crops/loop_0000100...,"(233, 21)"
184,1.0,media/stable_numbers/recrop_numbers/loop_00001...,k,2020-09-13 04:14:05.204852,media/stable_numbers/og_screenshots/loop_00001...,"(235, 27)",media/stable_numbers/number_crops/loop_0000101...,"(233, 21)"
186,1.0,media/stable_numbers/recrop_numbers/loop_00001...,k,2020-09-13 04:14:05.460731,media/stable_numbers/og_screenshots/loop_00001...,"(236, 27)",media/stable_numbers/number_crops/loop_0000102...,"(233, 21)"
...,...,...,...,...,...,...,...,...
265,139.0,media/stable_numbers/recrop_numbers/loop_00001...,pr,2020-09-13 04:14:14.809724,media/stable_numbers/og_screenshots/loop_00001...,"(236, 27)",media/stable_numbers/number_crops/loop_0000141...,"(233, 21)"
181,140.0,media/stable_numbers/recrop_numbers/loop_00000...,pr,2020-09-13 04:14:04.701539,media/stable_numbers/og_screenshots/loop_00000...,"(235, 27)",media/stable_numbers/number_crops/loop_0000099...,"(233, 21)"
183,140.0,media/stable_numbers/recrop_numbers/loop_00001...,pr,2020-09-13 04:14:04.918617,media/stable_numbers/og_screenshots/loop_00001...,"(235, 27)",media/stable_numbers/number_crops/loop_0000100...,"(233, 21)"
12529,141.0,media/stable_numbers/recrop_numbers/loop_00062...,pr,2020-09-15 22:13:00.475064,media/stable_numbers/og_screenshots/loop_00062...,"(245, 26)",media/stable_numbers/number_crops/loop_0006281...,"(245, 27)"


In [3]:
df.loc[df.top_left != df.og_top_left]

Unnamed: 0,numbers,file_path,type,record_timestamp,og_screenshot,top_left,og_file_path,og_top_left
12528,0.0,media/stable_numbers/recrop_numbers/loop_00062...,k,2020-09-15 22:13:00.475064,media/stable_numbers/og_screenshots/loop_00062...,"(245, 26)",media/stable_numbers/number_crops/loop_0006281...,"(245, 27)"
180,1.0,media/stable_numbers/recrop_numbers/loop_00000...,k,2020-09-13 04:14:04.701539,media/stable_numbers/og_screenshots/loop_00000...,"(235, 27)",media/stable_numbers/number_crops/loop_0000099...,"(233, 21)"
182,1.0,media/stable_numbers/recrop_numbers/loop_00001...,k,2020-09-13 04:14:04.918617,media/stable_numbers/og_screenshots/loop_00001...,"(235, 27)",media/stable_numbers/number_crops/loop_0000100...,"(233, 21)"
184,1.0,media/stable_numbers/recrop_numbers/loop_00001...,k,2020-09-13 04:14:05.204852,media/stable_numbers/og_screenshots/loop_00001...,"(235, 27)",media/stable_numbers/number_crops/loop_0000101...,"(233, 21)"
186,1.0,media/stable_numbers/recrop_numbers/loop_00001...,k,2020-09-13 04:14:05.460731,media/stable_numbers/og_screenshots/loop_00001...,"(236, 27)",media/stable_numbers/number_crops/loop_0000102...,"(233, 21)"
...,...,...,...,...,...,...,...,...
265,139.0,media/stable_numbers/recrop_numbers/loop_00001...,pr,2020-09-13 04:14:14.809724,media/stable_numbers/og_screenshots/loop_00001...,"(236, 27)",media/stable_numbers/number_crops/loop_0000141...,"(233, 21)"
181,140.0,media/stable_numbers/recrop_numbers/loop_00000...,pr,2020-09-13 04:14:04.701539,media/stable_numbers/og_screenshots/loop_00000...,"(235, 27)",media/stable_numbers/number_crops/loop_0000099...,"(233, 21)"
183,140.0,media/stable_numbers/recrop_numbers/loop_00001...,pr,2020-09-13 04:14:04.918617,media/stable_numbers/og_screenshots/loop_00001...,"(235, 27)",media/stable_numbers/number_crops/loop_0000100...,"(233, 21)"
12529,141.0,media/stable_numbers/recrop_numbers/loop_00062...,pr,2020-09-15 22:13:00.475064,media/stable_numbers/og_screenshots/loop_00062...,"(245, 26)",media/stable_numbers/number_crops/loop_0006281...,"(245, 27)"


In [4]:
df = pd.read_csv('recropped_stable_numbers.csv')

for i in range(160):
    df = df.loc[(df.numbers != f'{i}') & (df.numbers != f'{float(i)}')]
    
df.loc[df.top_left != df.og_top_left]#.top_left.value_counts()

Unnamed: 0,numbers,file_path,type,record_timestamp,og_screenshot,top_left,og_file_path,og_top_left
18,n,media/stable_numbers/recrop_numbers/loop_00000...,k,2020-09-13 03:16:31.483826,media/stable_numbers/og_screenshots/loop_00000...,"(229, 30)",media/stable_numbers/number_crops/loop_0000020...,"(168, 62)"
19,n,media/stable_numbers/recrop_numbers/loop_00000...,pr,2020-09-13 03:16:31.483826,media/stable_numbers/og_screenshots/loop_00000...,"(229, 30)",media/stable_numbers/number_crops/loop_0000020...,"(168, 62)"
176,n,media/stable_numbers/recrop_numbers/loop_00000...,k,2020-09-13 03:16:47.092816,media/stable_numbers/og_screenshots/loop_00000...,"(235, 27)",media/stable_numbers/number_crops/loop_0000099...,"(229, 30)"
177,n,media/stable_numbers/recrop_numbers/loop_00000...,pr,2020-09-13 03:16:47.092816,media/stable_numbers/og_screenshots/loop_00000...,"(235, 27)",media/stable_numbers/number_crops/loop_0000099...,"(229, 30)"
482,i37,media/stable_numbers/recrop_numbers/loop_00002...,pr,2020-09-13 04:43:08.670679,media/stable_numbers/og_screenshots/loop_00002...,"(235, 27)",media/stable_numbers/number_crops/loop_0000248...,"(179, 29)"
...,...,...,...,...,...,...,...,...
10519,n,media/stable_numbers/recrop_numbers/loop_00052...,pr,2020-09-15 00:31:51.888240,media/stable_numbers/og_screenshots/loop_00052...,"(235, 27)",media/stable_numbers/number_crops/loop_0005269...,"(177, 49)"
10520,n,media/stable_numbers/recrop_numbers/loop_00052...,k,2020-09-15 00:31:52.121579,media/stable_numbers/og_screenshots/loop_00052...,"(235, 27)",media/stable_numbers/number_crops/loop_0005270...,"(177, 49)"
10521,n,media/stable_numbers/recrop_numbers/loop_00052...,pr,2020-09-15 00:31:52.121579,media/stable_numbers/og_screenshots/loop_00052...,"(235, 27)",media/stable_numbers/number_crops/loop_0005270...,"(177, 49)"
10522,n,media/stable_numbers/recrop_numbers/loop_00052...,k,2020-09-15 00:31:52.336759,media/stable_numbers/og_screenshots/loop_00052...,"(235, 27)",media/stable_numbers/number_crops/loop_0005271...,"(177, 49)"


In [18]:
df.loc[df.top_left != df.og_top_left].og_top_left.value_counts()

(292, 57)    102
(229, 80)     96
(236, 26)     72
(197, 68)     60
(179, 28)     58
            ... 
(157, 44)      1
(270, 44)      1
(177, 29)      1
(234, 26)      1
(130, 28)      1
Name: og_top_left, Length: 117, dtype: int64

In [19]:
df = pd.read_csv('stable_numbers.csv')

for i in range(160):
    df = df.loc[(df.numbers != f'{i}') & (df.numbers != f'{float(i)}')]
    
df#.loc[df.top_left != df.og_top_left]#.top_left.value_counts()

Unnamed: 0,numbers,file_path,type,record_timestamp,og_screenshot,top_left
20,b,media/stable_numbers/number_crops/loop_0000010...,k,2020-09-13 03:16:29.202511,media/stable_numbers/og_screenshots/loop_00000...,"(229, 30)"
21,b,media/stable_numbers/number_crops/loop_0000010...,pr,2020-09-13 03:16:29.202511,media/stable_numbers/og_screenshots/loop_00000...,"(229, 30)"
68,i10,media/stable_numbers/number_crops/loop_0000034...,k,2020-09-13 03:16:34.670202,media/stable_numbers/og_screenshots/loop_00000...,"(229, 30)"
70,i10,media/stable_numbers/number_crops/loop_0000035...,k,2020-09-13 03:16:34.839918,media/stable_numbers/og_screenshots/loop_00000...,"(229, 30)"
74,i10,media/stable_numbers/number_crops/loop_0000037...,k,2020-09-13 03:16:35.296481,media/stable_numbers/og_screenshots/loop_00000...,"(229, 30)"
...,...,...,...,...,...,...
15131,i37e,media/stable_numbers/number_crops/loop_0007570...,pr,2020-09-16 20:50:02.866127,media/stable_numbers/og_screenshots/loop_00075...,"(172, 27)"
15132,i14e,media/stable_numbers/number_crops/loop_0007571...,k,2020-09-16 20:50:03.182184,media/stable_numbers/og_screenshots/loop_00075...,"(172, 27)"
15133,i37e,media/stable_numbers/number_crops/loop_0007571...,pr,2020-09-16 20:50:03.182184,media/stable_numbers/og_screenshots/loop_00075...,"(172, 27)"
15134,i14e,media/stable_numbers/number_crops/loop_0007572...,k,2020-09-16 20:50:03.666463,media/stable_numbers/og_screenshots/loop_00075...,"(172, 27)"


In [20]:
1283 + 1091 - 180

2194

In [10]:
# ln.ask_human()og_top_left

In [11]:
# loop_0002041pr.jpg should be 23, not 20
# loop_0002175k.jpg should be 16, not 10
# loop_0002827pr.jpg should be 21, not 32
# loop_0003079pr.jpg should be 47, not 48
# loop_0003126pr.jpg should be 41, not 42
# loop_0003241pr.jpg should be 32, not 33
# loop_0003265pr.jpg should be 27, not 37
# loop_0003322pr.jpg should be 27, not 37
# loop_0003449pr.jpg should be 18, not 59
# loop_0003585pr.jpg shoudl be 19, not 58
# loop_0004034pr.jpg should be 8, not 7
# loop_0004123k.jpg should be 23, not 25
# loop_0004395k.jpg should be 3, not 25
# loop_0004796pr.jpg should be 7, not 4
# loop_0004876k.jpg should be 32, not 5
# loop_0004978pr.jpg should be 3, not 33
# loop_0005085pr.jpg should be i2, not i3
# loop_0005145k.jpg should be 37, not 14
# loop_0005849pr.jpg should be 31, not 21
# loop_0006022k.jpg should be n, not 24
# loop_0006303pr.jpg should be 140, not 143
# ??? loop_0004836pr.jpg should be 8, not 7 ??? (taken care of)
# loop_0006454pr.jpg should be 122, not 123
# loop_0006466k.jpg should be 1, not n
# loop_0006486k.jpg should be 1, not n
# loop_0006486pr.jpg should be 120, not 12
# loop_0006506pr.jpg should be 118, not 119
# loop_0006760k.jpg should be 3, not 2
# loop_0006811k.jpg should be 4, not 3
# loop_0006863pr.jpg should be 95, not 96
# i20 that's followed by i29 should be i29  (loop_0007034pr.jpg)
# loop_0007077pr.jpg should be 83, not 8
# loop_0007207pr.jpg should be 141, not 14
# loop_0007213pr.jpg should be 141, not 14

In [12]:
# for sub_dir in os.listdir(ln.output_dir):
#     if sub_dir not in ['.ipynb_checkpoints', 'sample_records.csv']:
# #         print(len(os.listdir(ln.data_dir + sub_dir)))
#         t = []
#         c = 0
#         for f in os.listdir(ln.output_dir + sub_dir):
#             if '.jpg' in f:
#                 c += 1
#                 ff = ln.output_dir + sub_dir + '/' + f
#                 s = Image.open(ff).size
#                 if s not in t:
#                     t.append(s)
#                     t.append(ff)
#         print(c)
#         print(t)
#         print()

In [13]:
# ln.df_for_conversion