## Notes

This is producing unsatisfactory results, but the method is valid and I will keep it as a reference.

### Imports

In [None]:
import logging
import sys
import os
import pandas as pd

sys.path.append(os.path.abspath("../.."))

from scripts import get_session
from scripts import load_config, setup_logging
from models import ACS2017CountyData, MortalityRate2014


### Config and Logging Setup

In [None]:
try:
    config = load_config()
    setup_logging(config['paths']['log_path'])
    logging.info("Starting the data analysis project.")
except Exception as e:
    logging.error(f"Failed to load config or setup logging: {e}")
    raise

### Database Session

In [None]:
try:
    session = get_session()
    logging.info("Database session created successfully.")
except Exception as e:
    logging.error(f"Failed to create database session: {e}")
    raise

### Query netflix Data

In [None]:
try:
    data = session.query(
        ACS2017CountyData.poverty,
        MortalityRate2014.mortality_rate_2014_max
        ).filter(ACS2017CountyData.fips_code == MortalityRate2014.fips_code)\
        .all()

    
    df = pd.DataFrame(data)
    print(df.head())
except Exception as e:
    logging.error(f"Failed to query show data: {e}")
    raise


### Visualize Data

In [None]:
import seaborn as sns

import matplotlib.pyplot as plt

# Plotting poverty versus mortality rate
plt.figure(figsize=(10, 6), dpi=300)
sns.scatterplot(x='poverty', y='mortality_rate_2014_max', data=df)
plt.title('Poverty vs Mortality Rate')
plt.xlabel('Poverty Rate')
plt.ylabel('Mortality Rate 2014 Max')
plt.show()

### Preprocess Data

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

df = df.dropna()

scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)



### Apply DBSCAN

In [None]:
from sklearn.cluster import DBSCAN


dbscan = DBSCAN(eps=0.1, min_samples=5)
clusters = dbscan.fit_predict(df_scaled)

### Analyze Cluster Labels

In [None]:
df['Cluster'] = clusters
print(df['Cluster'].value_counts())

### Visualize DBSCAN Clusters

In [None]:
plt.figure(figsize=(10, 6), dpi=300)
sns.scatterplot(x=df['poverty'], y=df['mortality_rate_2014_max'], hue=df['Cluster'], palette='colorblind', alpha=0.7)
plt.title("DBSCAN Clustering of Poverty vs Mortality Rate")
plt.xlabel("Poverty Rate")
plt.ylabel("Mortality Rate 2014 Max")
plt.legend(title="Cluster")
plt.show()