In [None]:
import os
os.chdir("..")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import seaborn as sb
import math

In [None]:
from project.kerneldensityestimation import KernelDensityEstimation as KDE
from project.mixturedensity import MixtureDensity as MixDens
from project.model import Model
from project.dataholder import DataHolder
from project.lossfunction import MaximizeLikelihood
from project.optimizer import SPSA, SA
from project.topographicmap import TopographicMap

In [None]:
datafolder = "data/"
imagefolder = "figures/"

In [None]:
Winit = np.loadtxt("initW.txt")

In [None]:
df = pd.read_csv(datafolder+"data.tsv", delimiter="\t")

In [None]:
df

In [None]:
n_bins = 2
continue_flag = True
while continue_flag:
	counts,_,_ = np.histogram2d(df["rproj"], df["vproj"], bins=n_bins, range=[[0,10],[-3,3]])
	print(f"Using {n_bins} bins, with min {counts.min()}", end="\r", flush=True)
	if counts.min() < 50:
		continue_flag = False
		n_bins -= 1
		counts,_,_ = np.histogram2d(df["rproj"], df["vproj"], bins=n_bins, range=[[0,10],[-3,3]])
	else:
		n_bins += 1
print(f"Using {n_bins} bins, with min {counts.min()}", flush=True)

In [None]:
sb.histplot(data=df, x="rproj", y="vproj", bins=n_bins, cbar=True)

In [None]:
vbins = np.linspace(3, -3, n_bins+1)
rbins = np.linspace(0, 10, n_bins+1)
binwidth = 5
r3dbins = np.arange(0, df["# r3d"].max()+binwidth, binwidth)

In [None]:
import time

In [None]:
n_kernels = 4
tm = TopographicMap(0, 10, -3, 3, n_kernels, n_kernels)

In [None]:
fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

model = Model(tm)
def pymixture(xs, a1, a2, a3, m1, s1, m2, s2, m3, s3):
	print(f"{a1:.2f}, {a2:.2f}, {a3:.2f}, {m1:.2f}, {s1:.2f}, {m2:.2f}, {s2:.2f}, {m3:.2f}, {s3:.2f}", end="\r", flush=True)
	mixture = model.evalOnThetas(np.array([a1, a2, a3, m1, s1, m2, s2, m3, s3]))
	return mixture(xs)

initial_params = []
n_evals = []
for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True, alpha=0.5, label="data")
		values = subset["# r3d"].to_numpy()
		kde = KDE(values, 3)
		xs = np.linspace(r3dbins.min(), r3dbins.max(), 101)
		axs[j,i].plot(xs, kde(xs), linewidth=5, label="kde")
		params,_,output_dict,_,_ = curve_fit(pymixture, xs, kde(xs), p0=[1,1,1,5,5,5,1,1,1], bounds=([-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-np.inf,-10,-10,-10],[10,10,10,np.inf,np.inf,np.inf,10,10,10]), full_output=True)
		initial_params.append(params)
		mixture = model.evalOnThetas(params)
		axs[j,i].plot(xs, mixture(xs), linestyle="--", linewidth=5, label="mixture")
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.05])
		axs[j,i].legend(title=f"r={rbins[i:i+2].mean():.1f}, v={vbins[j:j+2].mean():.1f}")
		n_evals.append(output_dict["nfev"])

fig.tight_layout()
fig.savefig(imagefolder+"initialization.png")
fig.savefig(imagefolder+"initialization.pdf")

initial_params = np.array(initial_params).T

In [None]:
plt.plot(n_evals)
plt.axhline(np.mean(n_evals))

In [None]:
fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

def pymixture(xs, a1, a2, a3, m1, s1, m2, s2, m3, s3):
	print(f"{a1:.2f}, {a2:.2f}, {a3:.2f}, {m1:.2f}, {s1:.2f}, {m2:.2f}, {s2:.2f}, {m3:.2f}, {s3:.2f}", end="\r", flush=True)
	mixture = MixDens()
	params = np.array([a1, a2, a3, m1, s1, m2, s2, m3, s3])
	mixture.setParams(params)
	return mixture(xs)

initial_params = []
n_evals = []
for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True, alpha=0.5, label="data")
		values = subset["# r3d"].to_numpy()
		kde = KDE(values, 3)
		xs = np.linspace(r3dbins.min(), r3dbins.max(), 101)
		axs[j,i].plot(xs, kde(xs), linewidth=5, label="kde")
		params,_,output_dict,_,_ = curve_fit(pymixture, xs, kde(xs), p0=[1,1,1,5,5,5,1,1,1], bounds=([0,0,0,-np.inf,-np.inf,-np.inf,0,0,0],[np.inf,np.inf,np.inf,np.inf,np.inf,np.inf,100,100,100]), full_output=True)
		initial_params.append(params)
		mixture = MixDens()
		mixture.setParams(params)
		axs[j,i].plot(xs, mixture(xs), linestyle="--", linewidth=5, label="mixture")
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.05])
		axs[j,i].legend(title=f"r={rbins[i:i+2].mean():.1f}, v={vbins[j:j+2].mean():.1f}")
		n_evals.append(output_dict["nfev"])

fig.tight_layout()
fig.savefig(imagefolder+"initialization.png")
fig.savefig(imagefolder+"initialization.pdf")

initial_params = np.array(initial_params).T

In [None]:
plt.plot(n_evals)
plt.axhline(np.mean(n_evals))

In [None]:
initial_params.shape

In [None]:
initial_phis = []
rcentres = 0.5 * (rbins[1:] + rbins[:-1])
vcentres = 0.5 * (vbins[1:] + vbins[:-1])
for i in range(n_bins):
	for j in range(n_bins):
		initial_phis.append(tm(rcentres[i], vcentres[j]))
initial_phis = np.array(initial_phis).T

In [None]:
initial_phis.shape

In [None]:
np.linalg.pinv(initial_phis).shape

In [None]:
Winit = initial_params @ np.linalg.pinv(initial_phis)
Winit

In [None]:
Winit.shape

In [None]:
model.setParameters(Winit)

In [None]:
model.evalOnInput(rcentres[0], vcentres[0])

In [None]:
Winit @ tm(rcentres[0], vcentres[0])

In [None]:
initial_params[:,0]

In [None]:
(Winit @ initial_phis)[:,0]

In [None]:
fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

remade_parameters = Winit @ initial_phis
remade_parameters = np.where(remade_parameters>0, remade_parameters, 0.001)

for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True, alpha=0.5, label="data")
		values = subset["# r3d"].to_numpy()
		kde = KDE(values, 3)
		xs = np.linspace(r3dbins.min(), r3dbins.max(), 101)
		axs[j,i].plot(xs, kde(xs), linewidth=5, label="kde")
		mixture.setParams(initial_params[:,i*n_bins+j])
		axs[j,i].plot(xs, mixture(xs), linestyle="--", linewidth=5, label="mixture")
		mixture.setParams(remade_parameters[:,i*n_bins+j])
		axs[j,i].plot(xs, mixture(xs), linestyle="--", linewidth=5, label="after inversion")
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.05])
		axs[j,i].legend(title=f"r={rbins[i:i+2].mean():.1f}, v={vbins[j:j+2].mean():.1f}")

fig.tight_layout()
fig.savefig(imagefolder+"true_initialization.png")

In [None]:
initial_phis

In [None]:
U,S,Vt = np.linalg.svd(initial_phis)

In [None]:
S

In [None]:
pseudoinv_initphis = initial_phis.T @ np.linalg.inv(initial_phis@initial_phis.T)

In [None]:
Winit = initial_params @ pseudoinv_initphis

In [None]:
_,what,_ = np.linalg.svd(initial_phis@initial_phis.T)
what

In [None]:
fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

remade_parameters = Winit @ initial_phis
remade_parameters = np.where(remade_parameters>0, remade_parameters, 0.001)

for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True, alpha=0.5, label="data")
		values = subset["# r3d"].to_numpy()
		kde = KDE(values, 3)
		xs = np.linspace(r3dbins.min(), r3dbins.max(), 101)
		axs[j,i].plot(xs, kde(xs), linewidth=5, label="kde")
		mixture.setParams(initial_params[:,i*n_bins+j])
		axs[j,i].plot(xs, mixture(xs), linestyle="--", linewidth=5, label="mixture")
		mixture.setParams(remade_parameters[:,i*n_bins+j])
		axs[j,i].plot(xs, mixture(xs), linestyle="--", linewidth=5, label="after inversion")
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.05])
		axs[j,i].legend(title=f"r={rbins[i:i+2].mean():.1f}, v={vbins[j:j+2].mean():.1f}")

fig.tight_layout()
fig.savefig(imagefolder+"true_initialization.png")

In [None]:
dataholder = DataHolder(df[["rproj","vproj","# r3d"]].to_numpy()[:100000])

In [None]:
lossfunction = MaximizeLikelihood(dataholder, model)

In [None]:
optimizer = SPSA()

In [None]:
optimizer.maxIterations

In [None]:
W = optimizer.optimize(lossfunction, Winit)
W

In [None]:
n_bins = 25

fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

vbins = np.linspace(3, -3, n_bins+1)
rbins = np.linspace(0, 10, n_bins+1)
binwidth = 5
r3dbins = np.arange(0, df["# r3d"].max()+binwidth, binwidth)

for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True, alpha=0.5, label="data")
		values = subset["# r3d"].to_numpy()
		kde = KDE(values, 3)
		xs = np.linspace(r3dbins.min(), r3dbins.max(), 101)
		axs[j,i].plot(xs, kde(xs), linewidth=3, label="KDE")
		final_model.setParameters(W)
		targetdensity = final_model.evalOnInput(0.5*(rbins[i]+rbins[i+1]), 0.5*(vbins[i]+vbins[i+1]))
		axs[j,i].plot(xs, targetdensity(xs), linestyle="--", linewidth=3, label="after optimization")
		final_model.setParameters(Winit)
		initdensity = final_model.evalOnInput(0.5*(rbins[i]+rbins[i+1]), 0.5*(vbins[i]+vbins[i+1]))
		axs[j,i].plot(xs, initdensity(xs), linestyle=":", linewidth=5, label="with initialization")
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.05])
		axs[j,i].legend(title=f"r={rbins[i:i+1].mean():.1f}, v={vbins[j:j+1].mean():.1f}")

fig.tight_layout()
fig.savefig(imagefolder+"withSPSA.pdf")
fig.savefig(imagefolder+"withSPSA.png")