# Lab. 1

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Data preparation

In [None]:
functions = [
    lambda x: 5 * x**3 - 2 * x**2 + 3 * x - 17,
    lambda x: np.sin(x) + np.cos(x),
    lambda x: 2 * np.log(x + 1),
    lambda x, y: x + 2 * y,
    lambda x, y: np.sin(x / 2) + 2 * np.cos(x),
    lambda x, y: x**2 + 3 * x * y - 7 * y + 1,
]

In [None]:
domains = [
    [(-10, 10), (0, 100), (-1, 1), (-1000, 1000)],
    [(-3.14, 3.14), (0, 7), (0, 100), (-100, 100)],
    [(0, 4), (0, 9), (0, 99), (0, 999)],
    [(0, 1), (-10, 10), (0, 100), (-1000, 1000)],
    [(-3.14, 3.14), (0, 7), (0, 100), (-100, 100)],
    [(-10, 10), (0, 100), (-1, 1), (-1000, 1000)],
]

In [None]:
import os
import subprocess

In [None]:
input_filenames = []

for i, (function, domains_) in enumerate(zip(functions, domains)):
    if i in [3, 4, 5]:
        continue
    for domain in domains_:
        X = np.linspace(*domain, (n if (n := int(100 * abs(domain[1] - domain[0]))) < 1001 else 1000)).round(2)
        Y = function(X).round(2)
        filename = f"1_{i+1}__{domain[0]}_{domain[1]}.dat"
        with open(f"data\input\{filename}", "a") as f:
            f.write(f"1 100 -5 6 {X.shape[0]}\n")
        pd.DataFrame({"X": X, "Y": Y}).to_csv(
            f"data\input\{filename}", index=False, header=False, sep=" ", mode="a"
        )
        input_filenames.append(filename)

In [None]:
input_filenames = []

for i, (function, domains_) in enumerate(zip(functions, domains)):
    if i in [0, 1, 2]:
        continue
    for domain in domains_:
        X = Y = np.linspace(*domain, (n if (n := int(100 * abs(domain[1] - domain[0]))) < 51 else 50)).round(2)
        X2D, Y2D = np.meshgrid(X, Y)
        f_XY = function(X2D.ravel(), Y2D.ravel()).round(2)
        filename = f"1_{i+1}__{domain[0]}_{domain[1]}.dat"
        with open(f"data\input\{filename}", "a") as f:
            f.write(f"2 100 -5 17 {X.shape[0]*Y.shape[0]}\n")
        pd.DataFrame({"X": X2D.ravel(), "Y": Y2D.ravel(), "f(X, Y)": f_XY}).to_csv(
            f"data\input\{filename}", index=False, header=False, sep=" ", mode="a"
        )
        input_filenames.append(filename)

In [None]:
for input_file in input_filenames:
    output_file = ".".join(input_file.split('.')[:-1]) + '.out'
    print(
        command := f"copy NUL data\output\initial\{output_file} & cd TinyGP & java TinyGP.java ..\data\input\{input_file} > ..\data\output\initial\{output_file} & cd .."
    )
    t = subprocess.run(command, shell=True)

### Output parsing

In [None]:
import re
import json

In [None]:
def dictify(log):
    return (
        log.replace("AvgFitness", '{"avg_fitness"')
        .replace("BestFitness", ', "best_fitness"')
        .replace("AvgSize", ', "avg_size"')
        .replace("BestIndividual:", ', "best_individual": "')
        .replace("\n", " ")
        .replace("=", ": ")
        .replace("X1", "X")
        .replace("X2", "Y")
        .replace("PROBLEM*NOT*SOLVED", "")
        .replace("PROBLEMSOLVED", "")
        + '"}'
    )

In [None]:
def logs_to_df(logs):
    generations = {
        int(log.split(" ")[0][1:]): "".join(log.split(" ")[1:])
        for log in re.split("Generation", logs)[1:]
    }
    for k, v in generations.items():
        generations[k] = json.loads(dictify(v))
    return pd.DataFrame().from_dict(generations, orient="index")

### Comparison

In [None]:
import inspect

In [None]:
def compare(function, domain, df, size=(8, 8)):
    X = np.linspace(*domain, int(100 * abs(domain[1] - domain[0])))
    Y_true = function(X)

    best_individual = df.iloc[-1, -1]
    Y_gp = eval(best_individual)

    plt.figure(figsize=size)
    plt.plot(X, Y_true, label="True", linestyle="-", color="black", linewidth=4, alpha=0.5)
    plt.plot(X, Y_gp, label="GP", linestyle="-", color="red", linewidth=1)
    plt.legend()
    plt.title(
        inspect.getsource(function)
        .replace("lambda x:", "f(x) =")
        .replace(",", "")
        .replace("np.", "")
        + f"\nx ∈ [{domain[0]}, {domain[1]}]"

    )
    plt.grid()
    plt.show()

In [None]:
def compare_2_var(function, domain, df, size=(8, 8)):
    X, Y = np.meshgrid(np.linspace(*domain, 100), np.linspace(*domain, 100))
    Z_true = function(X.ravel(), Y.ravel())

    best_individual = df.iloc[-1, -1]
    Z_gp = eval(best_individual)

    plt.figure(figsize=size)
    ax = plt.axes(projection="3d")
    ax.plot_surface(
        X,
        Y,
        Z_true.reshape(X.shape),
        label="True",
        color="gray",
        linewidth=2,
        alpha=0.5,
    )
    ax.plot_surface(
        X, 
        Y, 
        Z_gp.reshape(X.shape), 
        label="GP", 
        color="red", 
        linewidth=1, 
        alpha=0.5
    )
    plt.legend()
    plt.title(
        inspect.getsource(function)
        .replace(",", "")
        .replace("lambda x y:", "f(x, y) =")
        .replace("np.", "")
        + f"\nx ∈ [{domain[0]}, {domain[1]}], y ∈ [{domain[0]}, {domain[1]}]"
    )
    plt.grid()
    plt.show()

In [None]:
def compare_2_var_3_views(function, domain, df, size=(12, 5)):
    X, Y = np.meshgrid(np.linspace(*domain, 100), np.linspace(*domain, 100))
    Z_true = function(X.ravel(), Y.ravel())

    best_individual = df.iloc[-1, -1]
    Z_gp = eval(best_individual)

    fig = plt.figure(figsize=size)
    fig.suptitle(
        inspect.getsource(function)
        .replace(",", "")
        .replace("lambda x y:", "f(x, y) =")
        .replace("np.", "")
        + f"\nx ∈ [{domain[0]}, {domain[1]}], y ∈ [{domain[0]}, {domain[1]}]"
    )

    ax = fig.add_subplot(1, 3, 1, projection='3d')

    surf1 = ax.plot_surface(X, Y, Z_true.reshape(X.shape), rstride=1, cstride=1,
                        linewidth=0, antialiased=False, color="gray", alpha=0.5, label="True")

    surf2 = ax.plot_surface(X, Y, Z_gp.reshape(X.shape), rstride=1, cstride=1,
                    linewidth=0, antialiased=False, color="red", alpha=0.5, label="GP")
    ax.xaxis.set_label_text("X")
    ax.yaxis.set_label_text("Y")
    ax.zaxis.set_label_text("Z")


    ax = fig.add_subplot(1, 3, 2, projection='3d')
    surf1 = ax.plot_surface(X, Y, Z_true.reshape(X.shape), rstride=1, cstride=1,
                    linewidth=0, antialiased=False, color="gray", alpha=0.5, label="True")

    surf2 = ax.plot_surface(X, Y, Z_gp.reshape(X.shape), rstride=1, cstride=1,
                    linewidth=0, antialiased=False, color="red", alpha=0.5, label="GP")
    ax.xaxis.set_label_text("X")
    ax.yaxis.set_label_text("Y")
    ax.zaxis.set_label_text("Z")
    ax.view_init(azim=90) 


    ax = fig.add_subplot(1, 3, 3, projection='3d')
    surf1 = ax.plot_surface(X, Y, Z_true.reshape(X.shape), rstride=1, cstride=1,
                    linewidth=0, antialiased=False, color="gray", alpha=0.5, label="True")

    surf2 = ax.plot_surface(X, Y, Z_gp.reshape(X.shape), rstride=1, cstride=1,
                    linewidth=0, antialiased=False, color="red", alpha=0.5, label="GP")
    ax.xaxis.set_label_text("X")
    ax.yaxis.set_label_text("Y")
    ax.zaxis.set_label_text("Z")
    ax.view_init(azim=180)

    plt.grid()
    plt.legend()
    plt.show()

In [None]:
def show_fitness_process(df):
    f, a = plt.subplots(2)
    a[0].plot(
        df["avg_fitness"], label="avg_fitness", linestyle="-", color="red", linewidth=1
    )
    a[0].legend()
    a[0].grid()
    a[1].plot(
        df["best_fitness"],
        label="best_fitness",
        linestyle="-",
        color="blue",
        linewidth=1,
    )
    a[1].legend()
    a[1].grid()
    a[0].title.set_text("fitness(generation)")
    plt.show()

In [None]:
for i, (function, domains_) in enumerate(zip(functions, domains)):
    print(
        f'Function {i+1}: {inspect.getsource(function).replace("lambda x:", "f(x) =").replace("lambda x, y:", "f(x, y) =")}'
    )
    for domain in domains_:

        with open(f"data/output/initial/1_{i+1}__{domain[0]}_{domain[1]}.out", "r") as f:
            logs = f.read()

        df = logs_to_df(logs)

        if i in [3, 4, 5]:
            compare_2_var_3_views(function, domain, df)
        else:
            compare(function, domain, df, size=(6, 6))

        show_fitness_process(df)