# Objective:
Using OCR technology to read the screenshot of my meso (both red and regular) and exp for each autobattle session
Clean the output for analysis

Choose to use EasyOCR (b/c it's a simple task) - https://www.jaided.ai/easyocr/documentation/

https://cloudinary.com/guides/web-performance/extract-text-from-images-in-python-with-pillow-and-pytesseract
some libraries to choose from.

In [None]:
# import all libraries 
import easyocr
import cv2
import PIL # this is needed to fix -module 'PIL.Image' has no attribute 'ANTIALIAS'- error
from matplotlib import pyplot as plt
import numpy as np

# Import libraries for mass processing
import glob 
import pandas as pd
import datetime

In [None]:
# function to check python virtual environment to ensure libraries are installed
def checkEnv():
    import sys
    envpath = (sys.executable)
    env = envpath.split('\\')             
    print("Your current python virtual environment is {}".format(env[5]))
# Check for correct environment
checkEnv()

# function to visualise image using the cv2 library
def checkImage(img, result):
    for i in range(len(result)):
        # top_left, bot_right = tuple(result[i][0][0]), tuple(result[i][0][2])
        top_left, bot_right = (int(result[i][0][0][0]),int(result[i][0][0][1])), (int(result[i][0][2][0]),int(result[i][0][2][1]))
        img = cv2.rectangle(img, top_left, bot_right, (0,255,128), 3)
    return img

# Function to calculate the total seconds based on the column - Duration
def findTotalSeconds(timestr):
    min, sec = int(timestr.split(":")[0]), int(timestr.split(":")[1])
    total = (min * 60) + sec
    return total


In [None]:
# locate the image
image_path = 'ab_04_af30.jpg'
# image_path = 'ab_03_sf144.jpg'

# Fixes the -module 'PIL.Image' has no attribute 'ANTIALIAS'- error
PIL.Image.ANTIALIAS = PIL.Image.LANCZOS 

# initialise the reader object and read the image
reader = easyocr.Reader(['en'], gpu = False)
result = reader.readtext(image_path)

In [None]:
display(result[-1][0][0][0])
display(int(result[-1][0][0][0]))

# Notice that the non-numerical text have been extracted as well

In [None]:
# Visualise each extracted element of the image using the function - checkImage
img = cv2.imread(image_path,1)
checked_img = checkImage(img, result)
plt.imshow(checked_img)
plt.show()

# Remove the unnecssary list elements - clean_result 
The "clean_result" list will only contain the following stats:
1. "Auto-Battle"
2. Duration
3. Kill Count
4. Regular Meso Gained
5. Red Meso Gained
6. Total Exp Gained

# Transform "clean_result" to "Final_result_list"
removes all the unneeded information for a cleaner list

In [None]:
print("Length of List: {}".format(len(result)))
for i in range(len(result)):
    print(result[i])

In [None]:
clean_result = result[:7] # remove the last 4 elements
clean_result.pop(5) # remove the 5th element from clean_result
print("Length of List: {}".format(len(clean_result)))
for i in range(len(clean_result)):
    print(clean_result[i])

In [None]:
# Visualise each extracted element of the image using the function - checkImage
img = cv2.imread(image_path,1)
checked_img = checkImage(img, clean_result)
plt.imshow(checked_img)
plt.show()

In [None]:
for i in range(len(clean_result)):
    print(clean_result[i])

In [None]:
# initialise empty list
info_list = ['Description', 'Duration', 'Kill Count', 'Meso', 'Red Meso', 'Total EXP Gained']
final_result_list = []

#appends each element (from clean_result) into the empty list
for i in range(len(clean_result)):
    ele = [info_list[i], clean_result[i][1]]
    final_result_list.append(ele)

# display final_result_list [description, duration, KC, Reg Meso, Red Meso, EXP]
display(final_result_list)

# Summary of the above:
1. Extracts the important statistics from the Autobattle image
2. Visualise it for better clarity
3. Removed the unnecessary list and list elements
4. Results in a clean list of duration, kill count, meso, red meso and exp
# Limitations
1. Only one image at a time
2. Still have to manually take screenshots
# Suggestion
1. Process multiple image in one go
2. Include map name, character class & name, datetime and timestamp
3. append the results into a container (dictionary, dataframe)

# Use Case for EasyOCR
Imagine you're interested in gathering information about the meso & exp rates in different Star Force (SF) and Arcane Force(AF) maps.
You take a screenshot of each autobattle(AB) but you're too lazy to manually add these figure into a spreadsheet. This is where easyOCR comes in. 



In [None]:
# Fixes the -module 'PIL.Image' has no attribute 'ANTIALIAS'- error
PIL.Image.ANTIALIAS = PIL.Image.LANCZOS 

# Initialise a list of descriptors 
info_list = ['Description', 'Duration', 'Kill Count', 'Meso', 'Red Meso', 'Total EXP Gained']

# Initialise a empty list, grand_list - this is a list which will contain the list of AB stats
grand_list = []

# get all the jpg file in the folder - Note: glob doesn't use regex, it follows the rules by unix shell 
list_of_image =  glob.glob(r'ab_*.jpg')
for i in range(len(list_of_image)):
    # Split the image name (ab_XX_SFXXX.jpg) and get the SF/AF level in upper case
    map_name = list_of_image[i].split("_")[-1].split(".")[0].upper()

    print("File Name: {},{} Map Name: {} ".format(list_of_image[i], map_name))
    
    # initialise the reader object and read the image
    reader = easyocr.Reader(['en'], gpu = False)
    result = reader.readtext(list_of_image[i])

    # # Visualise each extracted element of the image using the function - checkImage
    # img = cv2.imread(list_of_image[i],1)
    # checked_img = checkImage(img, result)
    # plt.imshow(checked_img)
    # plt.show()
    
    # Only keep the first 6 elements
    clean_result = result[:7]
    # remove the 5th element ("EXP") from clean_result
    clean_result.pop(5)

    final_result_list = []
    #appends each element (from clean_result) into the empty list
    for i in range(len(clean_result)):
        ele = [info_list[i], clean_result[i][1]]
        final_result_list.append(ele)
    # append the map name into final_result_list
    final_result_list.append(['Map', map_name])
    
    # display final_result_list [description, duration, KC, Reg Meso, Red Meso, EXP, Map]
    display(final_result_list)

    # append final_result_list into grand_list
    grand_list.append(final_result_list)

# Now we need to manipulate "grand_list" into a suitable format 
1. Remove the 1st element of each inner list - ["Description","Auto-Battle Results"]
2. extract the values (2nd element) and append it into a single list
3. The result is a list of list, with each inner list only containing values.

In [None]:
print(len(grand_list))
new_grand_list = []
for inner_list in grand_list:
    # new list to only store values
    new_inner_list = []
    # Removes the first element of the inner list
    inner_list = inner_list[1:]
    # extract values and append to new_inner_list
    for i in range(len(inner_list)):
        # Check for the first element, replace the last third element with ":"
        if i == 0:
            duration = inner_list[0][1].replace(inner_list[0][1][-3],':')
            new_inner_list.append(duration)
        else:
            new_inner_list.append(inner_list[i][1].replace(',',''))
    #append new_inner_list into new_grand_list
    new_grand_list.append(new_inner_list)

# display new_grand_list to check if everything is in order
display(new_grand_list)

# Transform new_grand_list into a Dataframe
1. columns is a list of Dataframe headers
2. new_grand_list is the content of the Dataframe
3. The name of the Dataframe is dfABstats

In [None]:
# List of Dataframe column headers
columns = ['Duration','Kill Count','Meso','Red Meso','Total Exp', 'Map']

# Create a Dataframe from new_grand_list
dfABstats = pd.DataFrame(new_grand_list, columns = columns)

# Change the datatype of each column into the appropriate datatype
dfABstats = dfABstats.astype({'Kill Count': 'int64',
                              'Meso': 'int64',
                              'Red Meso': 'int64',
                              'Total Exp': 'int64',
                              'Map': 'str'})
dfABstats['total_seconds'] = dfABstats['Duration'].apply(findTotalSeconds)

display(dfABstats.dtypes)
display(dfABstats)

# Suppose now you have your Dataframe, where can you save it? such that you can append new Dataframes to it and save it?
1. Either Excel Spreadsheet, CSV file or even into a RDBMS