In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from data_loader import IsingDataset
from matplotlib import animation


# Ising Model
The Ising model is a model from statistical physics that describes the behavior of magnetic materials. It is a simple model that can be used to study phase transitions and critical phenomena. The model consists of a lattice of spins, which can be in one of two states: up or down. The energy of the system is given by the sum of the interactions between neighboring spins. The Hamiltonian of the system is given by:
$$H = -\frac{1}{2} \sum_{i,j} J_{ij}s_i s_j - h \sum_i s_i$$
where $s_i$ is the spin at site $i$, $J_{ij}$ is the coupling constant, and $h$ is the external magnetic field. The first term in the Hamiltonian represents the interaction between spins, and the second term represents the interaction of the spins with the external magnetic field. 

## Thermodynamic properties
The thermodynamic properties of the system can be calculated from the partition function:
$$Z = \sum_{\{s_i\}} e^{-\beta H}$$
where $\beta = 1/(k_B T)$ is the inverse temperature, and the sum is over all possible configurations of the spins. The free energy of the system is given by:
$$F = -k_B T \log Z$$
The magnetization of the system is given by:
$$M = \frac{1}{N} \sum_i s_i$$
where $N$ is the total number of spins. The average magnetization is given by:
$$\langle M \rangle = \frac{1}{Z} \sum_{\{s_i\}} M e^{-\beta H}$$


## Simulated Data

In [None]:
ising_dset = IsingDataset("training_data.h5")
len(ising_dset)


In [None]:
config, mag, BJ = ising_dset[15]

In [None]:
plt.imshow(config.reshape(28, 28))
plt.colorbar()
plt.show()

# PCA
PCA is a statistical technique for reducing the dimensionality of a dataset. It works by transforming the original data into a new coordinate system, where the axes are aligned with the directions of maximum variance in the data. This transformation allows the data to be represented in a lower-dimensional space while preserving as much of the original variance as possible. The transformation is performed by finding the eigenvectors and eigenvalues of the data's covariance matrix $\Sigma = E[XX^T]$ into its eivenvalues and eigenvectors. The eigenvectors are the principal components of the data, and the eigenvalues represent the amount of variance that is explained by each principal component.
$$ \Lambda = \Gamma^T \Sigma \Gamma $$
where $\Lambda$ is a diagonal matrix of eigenvalues and $\Gamma$ is a matrix whose columns are the eigenvectors of $\Sigma$.


In [None]:
N = 10000
rng = np.random.default_rng(seed=42)
rand = rng.choice(len(ising_dset), size = N, replace = False)

In [None]:
configs = np.empty(shape=(N,28*28), dtype=float)
temps = np.empty(shape=(N,), dtype=float)

# Manual loop since h5dfs are weird
for i, idx in enumerate(rand):
    lattice, mag, BJ = ising_dset[idx]
    
    # np.array since lattice is torch.tensor
    configs[i] = np.array(lattice).flatten()
    temps[i] = 1/BJ.float()


In [None]:
pca = PCA()
XPCA = pca.fit_transform(configs)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5))

plt.scatter(XPCA[:,0], XPCA[:,1], c=temps, cmap='coolwarm',alpha = 0.5, label="Snapshots")

cbar = plt.colorbar()
cbar.ax.set_ylabel("Temperature")
plt.ylabel('P2')
plt.xlabel('P1')

plt.legend()

plt.savefig("PCA_temperatures.png")

plt.show()


In [None]:
fig, ax = plt.subplots(1, 1)
ax.set_yscale("log")
ax.scatter(np.arange(0, len(pca.explained_variance_)),pca.explained_variance_ratio_, c="b", marker="^")
ax.set_ylabel(r"$\tilde{\lambda}$")
ax.set_xlabel("$n$")
ax.set_title("Explained Variance by components")
plt.savefig("Explained_variance.png")
plt.show()

# Comparison
From thermodynamics, we know, that we can describe the system by the magnetization, which is a single scalar value. However, the system is described by a lattice of spins, which is a high-dimensional object. We can use PCA to reduce the dimensionality of the system and find the principal components that capture the most variance in the data. By comparing the principal components with the magnetization, we can gain insight into the behavior of the system and how the magnetization is related to the underlying structure of the data.

In [None]:
T_crit = 2.269
fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True)
temp = np.unique(temps)

magnetization = np.abs(np.mean(configs, axis = 1))

mag_mean = [np.mean(magnetization[temps == t]) for t in temp]
ax1.scatter(temp, mag_mean)
ax1.axvline(T_crit, linestyle="--", c="k")
ax1.set_xlabel("T")
ax1.set_ylabel(r"|m|")
ax1.set_title("Magnetization")
ticks = np.append(ax1.get_xticks(), T_crit)
ticksl = ticks.tolist()
ticksl[-1] = r"$T_c$"
ax1.set_xticks(ticks)
ax1.set_xticklabels(ticksl)


aux1 = pca.components_[0] / 28
aux = [np.mean(np.abs(np.sum(configs[temps == t, :] * aux1, axis=1))) for t in temp]
ax2.scatter(temp, aux)
ax2.axvline(T_crit, linestyle="--", c="k")


ax2.set_xlabel("T")
ax2.set_ylabel(r'$<|p_1|>$ / L'
              )
ax2.set_title("First Principal Component")


plt.tight_layout()
plt.savefig("Magnetization_vs_pc.png")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1)
ax.scatter(temp, np.abs(np.array(mag_mean) - np.array(aux)), marker="o", c="b")
ax.set_xlabel("T")
ax.set_ylabel(r"$| \frac{|p_1|}{L} - |m| |$")
plt.savefig("diff_p_1_m.png")
plt.show()

In [None]:
fig, axs = plt.subplots(3, 3)

for i, ax in enumerate(axs.flatten()):
    ax.imshow(pca.components_[i].reshape(28, 28))
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(f"{i}. component")
    
plt.tight_layout()

In [None]:
fig = plt.figure(facecolor=(1,1,1,0))
data = pca.components_[0].reshape(28, 28)
im = plt.imshow(data)

plt.title("Principal Components")

def init():
    im.set_data(pca.components_[0].reshape(28, 28))
    return im

def animate(data):
    im.set_data(data.reshape(28, 28))
    return im

anim = animation.FuncAnimation(fig, animate, init_func=init, frames=pca.components_[0:50],
                               interval=500)
anim.save("PCA_Components.gif", writer='imagemagick', savefig_kwargs={"transparent": True})