### Models

#### Model Options
- Linear Regression (or Kernelized Regression)
  - KernelDensity(kernel='linear')
- K-Nearest Neighbors
  - KernelDensity()
- Gaussian Processes
  - KernelDensity(kernel='gaussian')
- Neural Networks
- Random Forest Regressor
- Adaptive or Gradient Boosting Regressor
- "linear", "gaussian", 

In [None]:
import numpy as np
from sklearn.neighbors import KernelDensity
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.stats as stats

In [None]:
data = "data/train.csv"
df = pd.read_csv(data)

In [None]:
# extract the features & target variable from the dataframe

X = df.iloc[:,:-1]
y = df.iloc[:,-1:]
    # alt method: y = df['target']


In [None]:
# compute basic descriptive statistics for the target variable
mean = np.mean(y)
median = np.median(y)
std_dev = np.std(y)
min_val = np.min(y)
max_val = np.max(y)
y_range = max_val - min_val

print("Mean:", mean)
print("Median:", median)
print("Standard Deviation:", std_dev)
print("Minimum Value:", min_val)
print("Maximum Value:", max_val)
print("Range:", y_range)


In [None]:
# create a kernel density estimate of the target's distribution
kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(y.to_numpy().reshape(-1,1))

# create a set of x-values at which to evaluate the KDE
x_vals = np.linspace(min_val, max_val, 1000).reshape(-1,1)

# get the corresponding y-values by evaluating the KDE at the x-values 
y_vals = np.exp(kde.score_samples(x_vals))

# plot the KDE
plt.plot(x_vals, y_vals)
plt.xlabel('Target Variable')
plt.ylabel('Density')
plt.show()

# Alternative methods
# sns.displot(y, kind="kde")
# sns.kdeplot(y)

#### Data Exploration: Target 

In [None]:
y.head()

print(y.describe())
# ["count", "mean", "std", "min", "q1", "q2", "q3", "max"]

#### Data Exploration: Features

In [None]:
X.head()
X.describe()
feature_list = X.columns.values.tolist()

In [None]:
# for each in featurelist:
    # sns.displot(X)
# sns.displot(X, kind="kde")