In [None]:
import os
os.chdir("..")

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sb
import numpy as np

In [None]:
datafolder = "data/"
imagefolder = "figures/"

In [None]:
df = pd.read_csv(datafolder+"allhosts3.tsv", delimiter="\t")

In [None]:
df

### Check spins are all equally represented

In [None]:
fig, ax = plt.subplots()
df["spin"].hist(bins=np.arange(0.5,51), ax=ax, color="black", edgecolor="white")
ax.set_title("Point of View")
ax.set_ylabel("Counts")
ax.set_xlabel("Direction")
ax.set_xlim([0,51])
fig.tight_layout()
fig.savefig(imagefolder+"spins.png")

### Check mass distribution

In [None]:
mass_bins = np.linspace(df["mvirCl"].min(), df["mvirCl"].max(), 7)
labels = [f"From M={mass_bins[i]:.3e} to M={mass_bins[i+1]:.3e}" for i in range(len(mass_bins)-1)]

In [None]:
values[[10,15]]

In [None]:
values,counts = np.unique(df["mvirCl"], return_counts=True)
to_inspect = [14,22]
print(f"There are {len(values)} clusters")
plt.scatter(values, counts)
plt.scatter(values[to_inspect], counts[to_inspect], color="red", s=100, marker="x", label="To inspect")
plt.xlabel("Cluster mass")
plt.ylabel("Counts")
for b in mass_bins:
	plt.axvline(b, color="red", linestyle="--")
subdf = df.set_index("mvirCl").loc[values[to_inspect]]

### Check distributions of input data

In [None]:
fig, axs = plt.subplots(len(labels), 3, figsize=(12, 1+3*len(labels)))
for i, label in enumerate(labels):
	mask = (df["mvirCl"] >= mass_bins[i]) & (df["mvirCl"] < mass_bins[i+1])
	data = df.loc[mask]
	if data.empty:
		continue
	sb.histplot(data=data, x="rproj", y="vproj", bins=100, ax=axs[i, 0])
	sb.histplot(data=data, x="# r3d", y="vproj", bins=100, ax=axs[i, 1])
	sb.histplot(data=data, x="rproj", y="# r3d", bins=100, ax=axs[i, 2])
	axs[i, 1].set_title(label)
fig.tight_layout()

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(12, 4))

sb.histplot(data=subdf, x="rproj", y="vproj", bins=100, ax=axs[0])
sb.histplot(data=subdf, x="# r3d", y="vproj", bins=100, ax=axs[1])
sb.histplot(data=subdf, x="rproj", y="# r3d", bins=100, ax=axs[2])

fig.tight_layout()

In [None]:
fig, axs = plt.subplots(len(labels), 3, figsize=(12,1+3*len(labels)))
for i, label in enumerate(labels):
	mask = (df["mvirCl"] >= mass_bins[i]) & (df["mvirCl"] < mass_bins[i+1])
	data = df.loc[mask]
	if data.empty:
		continue
	data["rproj"].hist(bins=50, density=True, alpha=0.7, ax=axs[i,0])
	data["vproj"].hist(bins=50, density=True, alpha=0.7, ax=axs[i,1])
	data["# r3d"].hist(bins=50, density=True, alpha=0.7, ax=axs[i,2])
	axs[i, 1].set_title(label)
	axs[i, 0].plot([0,10],[0.02,0.18], linestyle="--", color="k")

fig.suptitle("Marginal distributions conditioning variables")
for i in range(len(labels)):
	axs[i, 0].set_xlabel(r"$r_\mathrm{proj}$")
	axs[i, 1].set_xlabel(r"$v_\mathrm{proj}$")
	axs[i, 2].set_xlabel(r"$R_\mathrm{3D}$")
	axs[i, 0].set_ylabel(r"$P(r_\mathrm{proj})$")
	axs[i, 1].set_ylabel(r"$P(v_\mathrm{proj})$")
	axs[i, 2].set_ylabel(r"$P(R_\mathrm{3D})$")
	axs[i, 0].set_xlim([0,10])
	axs[i, 1].set_xlim([-3,3])
	axs[i, 2].set_xlim(left=0)
for ax in axs.flatten():
	ax.set_ylim(bottom=0)
	ax.grid(False)
fig.tight_layout()

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(12,4))

subdf["rproj"].hist(bins=50, density=True, alpha=0.7, ax=axs[0])
subdf["vproj"].hist(bins=50, density=True, alpha=0.7, ax=axs[1])
subdf["# r3d"].hist(bins=50, density=True, alpha=0.7, ax=axs[2])
axs[0].plot([0,10],[0.02,0.18], linestyle="--", color="k")

fig.suptitle("Marginal distributions conditioning variables")
axs[0].set_xlabel(r"$r_\mathrm{proj}$")
axs[1].set_xlabel(r"$v_\mathrm{proj}$")
axs[2].set_xlabel(r"$R_\mathrm{3D}$")
axs[0].set_ylabel(r"$P(r_\mathrm{proj})$")
axs[1].set_ylabel(r"$P(v_\mathrm{proj})$")
axs[2].set_ylabel(r"$P(R_\mathrm{3D})$")
axs[0].set_xlim([0,10])
axs[1].set_xlim([-3,3])
axs[2].set_xlim(left=0)
for ax in axs.flatten():
	ax.set_ylim(bottom=0)
	ax.grid(False)
fig.tight_layout()

### Checking Hubble Flow deviation

In [None]:
fig, axs = plt.subplots()
upperlim = 10
sb.histplot(data=df.loc[df["# r3d"]<=upperlim], x="# r3d", y="Vradnorm", bins=[100,50], hue_norm=mpl.colors.LogNorm(), ax=axs)
#sb.scatterplot(data=data.loc[data["# r3d"]<=upperlim], x="# r3d", y="Vradnorm", ax=axs[i], color="black", s=10, alpha=0.002)
#axs.set_title(label)
#axs[i].set_ylim([-2, 3])
axs.set_xlim([0, upperlim])
beginning = 1
xs = np.linspace(beginning, upperlim, 100)
Delta = 200
axs.plot(xs, np.sqrt(2/Delta)*xs-xs**(-0.42), linestyle="--", color="red")
sigma = 1.5
axs.plot(xs, np.sqrt(2/Delta)*xs-xs**(-0.42)-sigma, linestyle=":", color="red")
axs.plot(xs, np.sqrt(2/Delta)*xs-xs**(-0.42)+sigma, linestyle=":", color="red")
fig.tight_layout()

In [None]:
fig, axs = plt.subplots(len(labels), 1, figsize=(6, 1+len(labels)*5))
for i, label in enumerate(labels):
	mask = (df["mvirCl"] >= mass_bins[i]) & (df["mvirCl"] < mass_bins[i+1])
	data = df.loc[mask]
	if data.empty:
		continue
	upperlim = 20
	sb.histplot(data=data.loc[data["# r3d"]<=upperlim], x="# r3d", y="Vradnorm", bins=[50,25], hue_norm=mpl.colors.LogNorm(), ax=axs[i])
#	sb.scatterplot(data=data.loc[data["# r3d"]<=upperlim], x="# r3d", y="Vradnorm", ax=axs[i], color="black", s=10, alpha=0.002)
	axs[i].set_title(label)
#	axs[i].set_ylim([-2, 3])
	axs[i].set_xlim([0, upperlim])
	beginning = 1
	xs = np.linspace(beginning, upperlim, 100)
	Delta = 200
	axs[i].plot(xs, np.sqrt(2/Delta)*xs-xs**(-0.42), linestyle="--", color="red")
	axs[i].plot(xs, np.sqrt(2/Delta)*xs, linestyle="--", color="k")
fig.tight_layout()

### Checking output distribution

In [None]:
n_bins = 2
continue_flag = True
while continue_flag:
    counts,_,_ = np.histogram2d(df["rproj"], df["vproj"], bins=n_bins, range=[[0,10],[-3,3]])
    print(f"Using {n_bins} bins, with min {counts.min()}", end="\r", flush=True)
    if counts.min() < 200:
        continue_flag = False
        n_bins -= 1
    else:
        n_bins += 1
print(f"Using {n_bins} bins, with min {counts.min()}")

In [None]:
fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

vbins = np.linspace(3, -3, n_bins+1)
rbins = np.linspace(0, 10, n_bins+1)
binwidth = 1
r3dbins = np.arange(0, df["# r3d"].max()+binwidth, binwidth)

for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True)
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.2])
		axs[j,i].set_xlim(left=0)
		axs[j,i].legend(title=f"r={rbins[i:i+1].mean():.1f}, v={vbins[j:j+1].mean():.1f}")
		print(f"{int(100*(i*n_bins+j)/n_bins**2)}%", flush=True, end="\r")

fig.tight_layout()
fig.savefig(imagefolder+"histograms.png")
plt.close()

In [None]:
r3dbins

In [None]:
fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

vbins = np.linspace(3, -3, n_bins+1)
rbins = np.linspace(0, 10, n_bins+1)
r3drange = 5
r3dbins = np.linspace(0, r3drange, 51)

df_slice = df.loc[df["# r3d"]<=r3drange]

for i in range(n_bins):
	rmask = np.logical_and(df_slice["rproj"]>=rbins[i], df_slice["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df_slice["vproj"]>=vbins[j+1], df_slice["vproj"]<=vbins[j])
		subset = df_slice.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True)
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
#		axs[j,i].set_ylim([0,0.2])
		axs[j,i].set_xlim([0,r3drange])
		axs[j,i].legend(title=f"r={rbins[i:i+1].mean():.1f}, v={vbins[j:j+1].mean():.1f}")
		print(f"{int(100*(i*n_bins+j)/n_bins**2)}%", flush=True, end="\r")

fig.tight_layout()
fig.savefig(imagefolder+"histograms_cluster.png")
plt.close()

In [None]:
fig, axs = plt.subplots(n_bins, n_bins, figsize=(n_bins*4,)*2)

vbins = np.linspace(3, -3, n_bins+1)
rbins = np.linspace(0, 10, n_bins+1)
r3dbins = np.geomspace(0.1, df["# r3d"].max(), 100)

for i in range(n_bins):
	rmask = np.logical_and(df["rproj"]>=rbins[i], df["rproj"]<=rbins[i+1])
	for j in range(n_bins):
		vmask = np.logical_and(df["vproj"]>=vbins[j+1], df["vproj"]<=vbins[j])
		subset = df.loc[np.logical_and(rmask, vmask)]
		axs[j,i].hist(subset["# r3d"], bins=r3dbins, density=True)
		axs[j,i].set_xscale("log")
		if j == n_bins - 1:
			axs[j,i].set_xlabel(r"$r_\mathrm{3D}$")
		else:
			axs[j,i].set_xticks([])
		if i == 0:
			axs[j,i].set_ylabel(r"$P(r_\mathrm{3D})$")
		else:
			axs[j,i].set_yticks([])
		axs[j,i].set_ylim([0,0.2])
		axs[j,i].legend(title=f"r={rbins[i:i+1].mean():.1f}, v={vbins[j:j+1].mean():.1f}")
		print(f"{int(100*(i*n_bins+j)/n_bins**2)}%", flush=True, end="\r")

fig.tight_layout()
fig.savefig(imagefolder+"loghistograms.png")
plt.close()

### Velocity w.r.t. R3D

In [None]:
df_slice = df.loc[df["# r3d"]<=50]
sb.histplot(x=df_slice["# r3d"], y=np.abs(df_slice["vproj"]), bins=101)