In [1]:
import plotly, pandas, numpy, scipy
import plotly.plotly as py
import plotly.graph_objs as go

from plotly.graph_objs.scatter import Marker

# Importando os dados
DATA = pandas.read_csv("./ILS_Shrink.csv") #.sort_values(by=['Nome'])

# Avaliações
BUDGETS = pandas.unique(DATA.Numero_total_de_comparacoes)
BUDGETS.sort()

budgets = {}
for i in BUDGETS:
    budgets[i] = DATA[DATA.Numero_total_de_comparacoes == i]

# Fitness máximo possível para cada instância do problema
MAX_FITNESS = {
    "warmup": [21, 210],
    "anchors": [21, 210],
    "ranges": [21, 210],
    "backrefs": [21, 210],
    "abba": [22, 220],
    "aman-aplan": [19, 190],
    "prime": [20, 300],
    "four": [21, 210],
    "order": [21, 210],
    "triples": [21, 630],
    "glob": [21, 420],
    "balance": [32, 320],
    "powers": [11, 110],
    "long-count": [1, 270],
    "alphabetical": [17, 340],
}

# Fitness que o Bartoli conseguiu segundo expressão regular gerada
BARTOLI_FITNESS = {
    "warmup": [21, 207],
    "anchors": [21, 208],
    "ranges": [21, 195],
    "backrefs": [18, 138],
    "abba": [20, 184],
    "aman-aplan": [16, 136],
    "prime": [13, 188],
    "four": [21, 183],
    "order": [21, 186],
    "triples": [15, 430],
    "glob": [19, 340],
    "balance": [29, 130],
    "powers": [7, 51],
    "long-count": [1, 191],
    "alphabetical": [12, 132],
}

# Fitness que o Norvig conseguiu segundo expressão regular gerada
NORVIG_FITNESS = {
    "warmup": [21, 207],
    "anchors": [21, 208],
    "ranges": [21, 191],
    "backrefs": [21, 175],
    "abba": [21, 186],
    "aman-aplan": [19, 157],
    "prime": [19, -398],
    "four": [21, 192],
    "order": [21, 190],
    "triples": [21, 589],
    "glob": [0, 392],
    "balance": [31, -1457],
    "powers": [11, -1969],
    "long-count": [1, 189],
    "alphabetical": [17, 294],
}


COLOR_GREEN = "#00b100"
COLOR_RED   = "#cc0000"
COLOR_BLUE  = "#0000cc"
COLOR_PURPLE = "#663399"
COLOR_ORANGE = "#ff4500"
COLOR_GRAY   = "#827689"



COLORS = ["#0dba86", "#0d98ba", "#0d42ba"]


OUR_FITNESS_LINE = go.Scatter(
    name="Pontuação Máxima",
    x=list(MAX_FITNESS.keys()),
    y=[i[0] for i in MAX_FITNESS.values()],
    mode='lines',
)

FITNESS_LINE = go.Scatter(
    name="Pontuação Máxima",
    x=list(MAX_FITNESS.keys()),
    y=[i[1] for i in MAX_FITNESS.values()],
    mode='markers',
    marker=dict(color=COLOR_GRAY, symbol="triangle-up"),
)

BARTOLI_LINE = go.Scatter(
    name="Genético (Bartoli)",
    x=list(BARTOLI_FITNESS.keys()),
    y=[i[1] for i in BARTOLI_FITNESS.values()],
    mode='markers',
    marker=dict(color=COLOR_RED, symbol="square"),
)

NORVIG_LINE = go.Scatter(
    name="Exato (Norvig)",
    x=list(NORVIG_FITNESS.keys()),
    y=[i[1] for i in NORVIG_FITNESS.values()],
    mode='markers',
    marker=dict(color=COLOR_ORANGE, symbol="diamond"),
)

ONE_MINUTE_LINE = go.Scatter(name="1 minute",
                             x=DATA.Nome,
                             y=[60 * 1000 for i in DATA.Nome],
                             mode='lines')

TWO_MINUTES_LINE = go.Scatter(name="2 minutes",
                              x=DATA.Nome,
                              y=[120 * 1000 for i in DATA.Nome],
                              mode='lines')


In [2]:
data = [go.Box(name="%dk" % (i / 1000), x=budgets.Nome, y=budgets.Tempo_total) for i, budgets in budgets.items()]
data.append(ONE_MINUTE_LINE)
# data.append(TWO_MINUTES_LINE)

layout = go.Layout(title="Tempo total de execução em milissegundos",
                xaxis=dict(title='Budget'),
                yaxis=dict(title='Tempo Total'),
                boxmode='group')
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [3]:
# Médias de número de comparações para encontrar melhor fitness
MAX_LINE = go.Scatter(name="600k line", x=DATA.Nome.unique(), y=[600000 for i in range(15)], mode='lines', marker=Marker(color='rgb(77,77,255)'))


data = [go.Box(name="%dk" % (i / 1000), x=budget.Nome, y=budget.Numero_de_comparacoes) for i, budget in budgets.items()]
data.append(MAX_LINE)


layout = go.Layout(title="Número de comparações até encontrar Melhor Fitness",
                xaxis=dict(title='Instâncias'),
                yaxis=dict(title='Quantidade de comprações'),
                boxmode='group')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [4]:
data = [go.Box(name="%dk" % (i / 1000), x=budget.Nome, y=budget.Tempo_para_encontrar_melhor_solucao) for i, budget in budgets.items()]
data.append(ONE_MINUTE_LINE)
data.append(TWO_MINUTES_LINE)

layout = go.Layout(title="Tempo até encontrar a melhor solução (em milissegundos)",
                xaxis=dict(title='Instâncias'),
                yaxis=dict(title='Tempo até melhor solução'),
                boxmode='group')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [5]:
# Boxplot comparando 'melhor fitness'

data = [go.Box(name="%dk" % (i / 1000), x=budget.Nome, y=budget.Melhor_fitness) for i, budget in budgets.items()]
data.extend([FITNESS_LINE, BARTOLI_LINE, NORVIG_LINE])


layout = go.Layout(title="Melhor fitness",
                xaxis=dict(title='Instâncias'),
                yaxis=dict(title='fitness'),
                boxmode='group')
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [7]:
from scipy import stats

data = [list(b.Melhor_fitness) for i, b in budgets.items()]

# result = stats.kruskal(*data)
# result

def printar_pontuacao():
    names = budgets[300000].Nome.unique()

    for name in names:
        txt = [name]
        for i, b in budgets.items():
            d = b[b.Nome == name]
            txt.append(d.Melhor_fitness.min())
            txt.append(d.Melhor_fitness.max())
            txt.append("%d" % round(d.Melhor_fitness.mean()))
#             txt.append(d.Melhor_fitness.median())
        txt = map(str, txt)
        print(";".join(txt))

# printar_pontuacao()


def printar_kruskal():
    names = budgets[300000].Nome.unique()
    total_data = {}

    for name in names:
        total_data[name] = []

        for i, d in budgets.items():
            data = d[d.Nome == name]
            total_data[name].append(list(data.Melhor_fitness))

        try:        
            result = "%.2f" % stats.kruskal(*total_data[name]).pvalue
        except ValueError:
            result = 1.0

        print(name, result)

printar_kruskal()


warmup 1.0
ranges 1.0
anchors 0.03
backrefs 0.04
abba 0.71
aman-aplan 0.01
prime 1.00
four 0.32
glob 0.20
order 0.01
triples 0.23
balance 0.07
long-count 1.0
powers 1.00
alphabetical 1.00
