In [None]:
import os
os.chdir("..")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import seaborn as sb
import math

In [None]:
from project.kerneldensityestimation import KernelDensityEstimation as KDE
from project.mixturedensity import MixtureDensity as MixDens
from project.model import Model
from project.dataholder import DataHolder
from project.lossfunction import MaximizeLikelihood
from project.optimizer import SPSA, SA
from project.topographicmap import TopographicMap

In [None]:
datafolder = "data/"
imagefolder = "figures/puregaussian/"

In [None]:
df = pd.read_csv(datafolder+"data.tsv", delimiter="\t")

In [None]:
n_bins = 2
continue_flag = True
while continue_flag:
	counts,_,_ = np.histogram2d(df["rproj"], df["vproj"], bins=n_bins, range=[[0,10],[-3,3]])
	print(f"Using {n_bins} bins, with min {counts.min()}", end="\r", flush=True)
	if counts.min() < 50:
		continue_flag = False
		n_bins -= 1
		counts,_,_ = np.histogram2d(df["rproj"], df["vproj"], bins=n_bins, range=[[0,10],[-3,3]])
	else:
		n_bins += 1
print(f"Using {n_bins} bins, with min {counts.min()}", flush=True)

In [None]:
vbins = np.linspace(3, -3, n_bins+1)
rbins = np.linspace(0, 10, n_bins+1)
binwidth = 5
r3dbins = np.arange(0, df["# r3d"].max()+binwidth, binwidth)
xs = np.linspace(r3dbins.min(), r3dbins.max(), 101)

In [None]:
n_kernels = 8
tm = TopographicMap(0, 10, -3, 3, n_kernels, n_kernels)

In [None]:
initial_phis = []
rcentres = 0.5 * (rbins[1:] + rbins[:-1])
vcentres = 0.5 * (vbins[1:] + vbins[:-1])
for i in range(n_bins):
	for j in range(n_bins):
		initial_phis.append(tm(rcentres[i], vcentres[j]))
initial_phis = np.round(initial_phis, 5).T

In [None]:
model = Model(tm)
def pymixture(xs, a1, m1, s1):
	mixture = model.evalOnThetas(np.array([a1, m1, s1]))
	return mixture(xs)

plt.plot(xs, pymixture(xs,1,60,3))

In [None]:
fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

initial_params = []
n_evals_transformed = []
for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True, alpha=0.5, label="data")
		values = subset["# r3d"].to_numpy()
		kde = KDE(values, 5)
		xs = np.linspace(r3dbins.min(), r3dbins.max(), 1001)
		axs[j,i].plot(xs, kde(xs), linewidth=5, label="kde")
		try:
			params,_,output_dict,_,_ = curve_fit(pymixture, xs, kde(xs), p0=[1,60,3], bounds=([0,0,-10,],[1,150,10]), full_output=True, maxfev=10000)
			n_evals_transformed.append(output_dict["nfev"])
			mixture = model.evalOnThetas(params)
			axs[j,i].plot(xs, mixture(xs), linestyle="--", linewidth=5, label="mixture")
		except Exception as error:
			print(error)
			params = [np.nan]*3
		initial_params.append(params)
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.05])
		axs[j,i].legend(title=f"r={rbins[i:i+2].mean():.1f}, v={vbins[j:j+2].mean():.1f}")

fig.tight_layout()

initial_params_transformed = np.array(initial_params).T
initial_params_transformed[2,:] = np.abs(initial_params_transformed[2,:])

In [None]:
initial_params_transformed

In [None]:
initial_phis

In [None]:
Winit_transformed = initial_params_transformed @ np.linalg.pinv(initial_phis)
Winit_transformed

In [None]:
dataholder = DataHolder(df[["rproj","vproj","# r3d"]].to_numpy()[:100000])

In [None]:
lossfunction = MaximizeLikelihood(dataholder, model)

In [None]:
optimizer = SPSA()

In [None]:
optimizer.maxIterations

In [None]:
W_transformed = optimizer.optimize(lossfunction, Winit_transformed)
W_transformed

In [None]:
n_bins = 25

fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

vbins = np.linspace(3, -3, n_bins+1)
rbins = np.linspace(0, 10, n_bins+1)
binwidth = 5
r3dbins = np.arange(0, df["# r3d"].max()+binwidth, binwidth)

for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True, alpha=0.5, label="data")
		values = subset["# r3d"].to_numpy()
		kde = KDE(values, 3)
		xs = np.linspace(r3dbins.min(), r3dbins.max(), 101)
		axs[j,i].plot(xs, kde(xs), linewidth=3, label="KDE")
		model.setParameters(W_transformed)
		targetdensity = model.getDensity(0.5*(rbins[i]+rbins[i+1]), 0.5*(vbins[i]+vbins[i+1]))
		axs[j,i].plot(xs, targetdensity(xs), linestyle="--", linewidth=3, label="after optimization")
		model.setParameters(Winit_transformed)
		initdensity = model.getDensity(0.5*(rbins[i]+rbins[i+1]), 0.5*(vbins[i]+vbins[i+1]))
		axs[j,i].plot(xs, initdensity(xs), linestyle=":", linewidth=5, label="with initialization")
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.05])
		axs[j,i].legend(title=f"r={rbins[i:i+1].mean():.1f}, v={vbins[j:j+1].mean():.1f}")

fig.tight_layout()
fig.savefig(imagefolder+"optimized-with-init-with-transformed.png")