# String Matching

### Describing Problem: Generating a target string starting from a random string of the same length

In [None]:
import random
import datetime
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg
from matplotlib import animation
from IPython.display import HTML

import numpy as np
import pandas as pd

%matplotlib inline

### Genetic Functions

In [None]:
# return a random string of the specified length
def generate_parent(length):
    genes = []
    while len(genes) < length:
        sampleSize = min(length - len(genes), len(geneSet))
        genes.extend(random.sample(geneSet, sampleSize))
    return ''.join(genes)

# returns the fitness score of the guessed string
def get_fitness(guess):
    return sum(1 for expected, actual in zip(target, guess) if expected == actual)

# Changing a character in the parent string at random index 
def mutate(parent):
    index = random.randrange(0, len(parent))
    childGenes = list(parent)
    newGene, alternate = random.sample(geneSet, 2)
    childGenes[index] = alternate if newGene == childGenes[index] else newGene
    return ''.join(childGenes)

def display(guess,startTime):
    timeDiff = (datetime.datetime.now() - startTime).microseconds
    fitness = get_fitness(guess)
    print("Guess: {}\tFitness Score: {}\tTime Taken (µs): {}".format(guess, fitness, timeDiff))

### Main Function

In [None]:
def predict_output(geneSet,target):
    
    all_childs=[] # List for storing all childs (used for plotting)

    # Initial Run
    startTime = datetime.datetime.now()
    bestParent = generate_parent(len(target))
    bestFitness = get_fitness(bestParent)
    display(bestParent,startTime)

    # Stopping when the guess matches result
    i=0 # Counting total iterations
    while True:
        i+=1
        child = mutate(bestParent)
        childFitness = get_fitness(child)
        
        all_childs.append(child)
        if bestFitness >= childFitness:
            continue
        display(child,startTime)
        
        if childFitness >= len(bestParent):
            break
        bestFitness = childFitness
        bestParent = child

    possibilities=len(set(geneSet))**len(target)

    print(f'\nTotal Possibilities: {possibilities}')
    print(f'Total iterations: {i}')
    print(f'Optimization over large space state: {round(((possibilities-i)/possibilities)*100,2)}%')
    
    return all_childs

### Visualizing String Matching

In [None]:
def gengraph(possibilities,child,target,count):
          
#     for count,child in enumerate(child_list):
    gc=0 # Green Counter
    fig,ax=plt.subplots(1,1,figsize=(15,6))

    # Getting colors for arrows
    colors=[]
    for i, (expected,actual) in enumerate(list(zip(target, child))):
        if expected == actual:
            colors.append('green')
        else:
            colors.append('blue')    

    arrows=[]

    # Getting spaces between individual characters
    geneSet_spacing=np.linspace(0.1,0.9,len(possibilities))
    child_spacing=np.linspace(0.3,0.7,len(child))

    # Printing all characters
    for i,x in enumerate(geneSet_spacing):
        plt.text(x-0.005,0.8,possibilities[i],fontsize=12)

    # Printing Underscores
    for i,x in enumerate(child_spacing):
        plt.text(x,0.25,'_',fontsize=15)

    # Printing Child Characters    
    for i,x in enumerate(child_spacing):
        plt.text(x,0.26,child[i],fontsize=15)

    # Dictionary with keys as characters and values as spacing (for getting x-coordinates to define arrows)
    xpos=dict(zip(possibilities,geneSet_spacing))

    # Defining arrows for every character of child
    for i,x in enumerate(child):
        if colors[i]=='green':
            gc+=1 
            alpha,color=0.3,'green'
        else:
            alpha,color=0.1, 'blue'
        arrows.append(patches.Arrow(xpos[x],0.8,child_spacing[i]-xpos[x],-0.5,width=0.01,alpha=alpha,color=color))

    for p in arrows:
        ax.add_patch(p)


    plt.text(0.9,0.65,f"Fitness Score: {gc}",fontsize=13)
    plt.text(0.9,0.50,f"Total iterations: {count}",fontsize=13)
    plt.text(0.9,0.35,f"Current Guess: {child}",fontsize=13)

    plt.axis(False)
    plt.savefig(f'anim/{count}_{child}.png')
    plt.close()
    

In [None]:
geneSet = "01"
target = "1111111111"

test= predict_output(geneSet,target)

In [None]:
geneSet = "aAbBcCdDeEfFgGhHiIjJkKlLmM nNoOpPqQrRsStTuUvVwWxXyYzZ"
target = "HPC Project"

test= predict_output(geneSet,target)

### Generating Images

In [None]:
def generate_images(possibilities,child_list,target):
    for count,child in enumerate(child_list):
        gengraph(possibilities,child,target,count)
#         print(count,child)

In [None]:
generate_images(possibilities=geneSet,child_list=test,target=target)

### Generating Images in Parallel

In [None]:
# import threading

# threads = 4
# jobs = []
# for i in range(0, threads):
#     thread = threading.Thread(target=generate_images(possibilities=geneSet,child_list=test,target=target))
#     jobs.append(thread)
    
# for j in jobs:
#     j.start()

# for j in jobs:
#     j.join()

### Gathering Images

In [None]:
imgs=[]
for i,each in enumerate(test):
    imgs.append(mpimg.imread(f'anim/{i}_{each}.png'))
len(imgs)

### Creating Animation from multiple images

In [None]:
def plot_images(img_list):
    def init():
        img.set_data(img_list[0])
        return (img,)

    def animate(i):
        img.set_data(img_list[i])
        return (img,)

    fig = plt.figure(figsize=(20,8))
    ax = fig.gca()
    img = ax.imshow(img_list[0])
    plt.axis('off')
    anim = animation.FuncAnimation(fig, animate, init_func=init,
                                 frames=len(img_list), interval=50, blit=True)
    return anim

In [None]:
# Viewing Animation
HTML(plot_images(imgs).to_html5_video())

In [None]:
# Controlling Animation
HTML(plot_images(imgs).to_jshtml())

In [None]:
# Saving animation
plot_images(imgs).save('test.mp4');