# Implementation of Finite Mixture Model

## Import Needed Filepaths and Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from texas_gerrymandering_hb4.config import FINAL_CSV

## Load Dataset Into Pandas DataFrame
Our processed dataset is read into a Pandas DataFrame.

In [None]:
df = pd.read_csv(FINAL_CSV)

## Define Response Variable `y`
To clarify, `y` represents an array of outcomes.

In [None]:
y = df["dem_share"].values.astype(float)

## Set Up Reproducible Random Number Generator

In [None]:
np.random.seed(123)

## Setting Up Mixture Model

## Initial Parameters
* These parameters represent starting guesses for the Gibbs sampler.
* `lambda` ($\lambda$) is the mixing proportion. It is set to 0.5 to serve as a neutral initial guess.
* `mu_1` and `mu_2` represent the inital means for each component.
* `sigma_squared_1` and `sigma_squared_2` are the inital variances for each component.

In [None]:
lambda = 0.5
mu_1 = np.mean(y)
mu_2 = np.mean(y)
sigma_squared_1 = np.var(y)
sigma_squared_2 = np.var(y)

### Setting Priors
* `alpha_1` and `alpha_2` are the priors for lambda.
$$\lambda \sim Beta(\alpha_2, \alpha_2)$$
Because the prior for lambda is Beta(2,2), `alpha_1` and `alpha_2` are both set to 2.
* What is beneficial about this is that the probability of obtaining 0 or 1, which is a degenerate model, is 0. Hence, as you get closer to 0 or 1, the likelihood is tiny. As a result, the problem is pushed further away from a degenerate value, so they become less likely to accidently become a point of convergence.

In [None]:
alpha_1 = 2
alpha_2 = 2

mu0_1 = np.mean(y)
mu0_2 = np.mean(y)


### Gibbs Sampler Parameters

In [None]:
iterations = 1000
warmup = 500

## Storage for Samples

In [None]:
lambda samples = np.zeros(iterations)
mu1_samples = np.zeros(iterations)
mu2_samples = np.zeros(iterations)
sigma_squared_1_samples = np.zeros(iterations)
sigma_squared_2_samples = np.zeros(iterations)

## Inverse-$\chi^2$ Sampler

In [None]:
def scaled_inverse_chi_squared(degrees_of_freedom, scale):
    return degrees_of_freedom * scale /np.random.chisquare(degrees_of_freedom)

## Gibbs Sampler Implementation