# Dynamic Programming
## Longest Common Substring

A substring is a subset of consecutive elements of a string. For example, 'abc' has the substrings: {'a', 'b', 'c', 'ab', 'bc', 'abc'}. Notice 'ac' is NOT a substring because 'a' and 'c' do not appear consecutively in the original string.

The longest common substring algorithm takes two strings as input and outputs the longest substring that belongs to both strings.

Given string1 and string2 lengths n and m, use the following method:

1. Create a n+1 x m+1 matrix that has the characters of string1 as rows and the characters of string2 as columns with a column of padding on the far left and at the top.
2. Initialize all values to zeros.
3. Starting at (2, 2) if you are indexed by 1, compare the row and column characters. If they are the same, add 1 to the value in (i-1, j-1). Leave the value as zero if they are not.

The tables below demonstrate the progress of the algorithm.

In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

def style_specific_cell(df, row, col, c):
    df_styler = pd.DataFrame('', index = df.index, columns = df.columns)
    df_styler.iloc[row, col] = 'color: black;background-color: ' + c
    return df_styler

def style_lcs_match(df, row, col):
    df_styler = pd.DataFrame('', index = df.index, columns = df.columns)
    #current
    df_styler.iloc[row, col] = 'color: black; background-color: gold'
    #upper left
    df_styler.iloc[row-1, col-1] = 'color: black; background-color: lightgreen'
    #row char
    df_styler.iloc[row, 0] = 'color: black; background-color: yellowgreen; font-weight:bold'
    #column char
    df_styler.iloc[0, col] = 'color: black; background-color: yellowgreen; font-weight:bold'
    return df_styler

def style_lcs_no_match(df, row, col):
    df_styler = pd.DataFrame('', index = df.index, columns = df.columns)
    #current
    df_styler.iloc[row, col] = 'color: black; background-color: gold'
    #row char
    df_styler.iloc[row, 0] = 'color: black; background-color: lightcoral; font-weight:bold'
    #column char
    df_styler.iloc[0, col] = 'color: black; background-color: lightcoral; font-weight:bold'
    return df_styler

def style_lcs_final(df, row, col):
    df_styler = pd.DataFrame('', index = df.index, columns = df.columns)
    df_styler.iloc[row, col] = 'color: black; background-color: gold'
    df_styler.iloc[row, 0] = 'color: black; background-color: yellowgreen; font-weight:bold'
    df_styler.iloc[0, col] = 'color: black; background-color: yellowgreen; font-weight:bold'

    temp_row, temp_col = row-1, col-1
    v = df.iloc[temp_row, temp_col]
    while((v > 0) and (temp_row > 1 and temp_col > 1)):
        df_styler.iloc[temp_row, temp_col] = 'color: black; background-color: gold; font-weight:bold'
        df_styler.iloc[temp_row, 0] = 'color: black; background-color: yellowgreen; font-weight:bold'
        df_styler.iloc[0, temp_col] = 'color: black; background-color: yellowgreen; font-weight:bold'
        temp_row -=1
        temp_col -=1
        v = df.iloc[temp_row, temp_col]
    return df_styler

In [2]:
##### STRINGS DEFINED HERE #####
string1 = '--abdabc' #MUST have '--' in front of string
string2 = '--abda' #MUST have '--' in front of string
################################

df = pd.DataFrame(np.zeros((len(string2)-1, len(string1)))).astype(int)
df.loc[-1] = [x for x in string1]
df.index = df.index + 1
df = df.sort_index()
df.iloc[:,0] = [x for x in string2]

df.style.hide(axis=1).hide(axis=0)

0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0
b,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


In [3]:
max_value = [0, 0, 0]
i = 2
for row_char in df.iloc[2:, 0]:
    j = 2
    for column_char in df.iloc[0, 2:]:
        if row_char == column_char:
            df.iloc[i, j] = df.iloc[i-1, j-1] + 1
            temp = df.style.apply(style_lcs_match, row = i, col = j, axis = None).hide(axis=1).hide(axis=0)
        else:
            temp = df.style.apply(style_lcs_no_match, row = i, col = j, axis = None).hide(axis=1).hide(axis=0)

        if(max_value[0] < df.iloc[i, j]):
            max_value[0], max_value[1], max_value[2] = df.iloc[i, j], i, j

        display(temp)

        j += 1

    i += 1


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,0,0,0
b,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,0,0,0
b,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,0,0,0
b,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,0,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,0,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,0,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,0,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,1,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,1,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,1,0,0,0,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,1,0,0,4,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,1,0,0,4,0,0


0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,1,0,0,4,0,0


In [4]:
df.style.apply(style_lcs_final, row = max_value[1], col = max_value[2], axis = None).hide(axis=1).hide(axis=0)

0,1,2,3,4,5,6,7
-,-,a,b,d,a,b,c
-,0,0,0,0,0,0,0
a,0,1,0,0,1,0,0
b,0,0,2,0,0,2,0
d,0,0,0,3,0,0,0
a,0,1,0,0,4,0,0


The final table above shows the method of obtaining the final answer:
1. Find the maximum value in the table and select the corresponding character.
2. Select each character going towards the top left until the value on the diagonal (i-1, j-1) is zero.

The final solution for this problem is: 'abda'