# Customer Clustering
Unsupervised ML technique to group our customers. Unlike classification, our examples are unlabeled and we group based on their similiarities. 

In [6]:
# packages
import pandas as pd
import os
import warnings
from dotenv import load_dotenv
from google.cloud import bigquery
from datetime import datetime
import matplotlib.pyplot as plt

# preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline

# clustering
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

In [7]:
# retreive data from bigquery

load_dotenv()
GOOGLE_APPLICATION_CREDENTIALS = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')

# Initialize the BigQuery Client
client = bigquery.Client()

# BQ query
sql_query = """
    SELECT 
        * 
    FROM `crudek-data.practice_data.marketing_campaign_clean`             
"""

query_job = client.query(
    sql_query)

# retreive and convert the result to a Pandas DataFrame
df = query_job.to_dataframe()

### Data Preparation
A Majority of the data preparation work was completed in the *1_data_preparation* notebook within this repo. If needed, go back and familiarize yourself with the following steps that were taken:
1. EDA
2. Handle missing data
3. Handle outliers
4. Create new features
5. Remove certain fields

In [9]:
df.head()

Unnamed: 0,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,...,Complain,Response,CustomerAge,DaysEnrolled,Income,Relationship,Education,TotalAcceptedCampaigns,TotalAmountSpent,Dependents
0,37.0,39.0,1.0,9.0,2.0,0.0,8.0,0.0,1.0,0.0,...,0.0,0.0,49.0,2402.0,157733.0,current,medium,0.0,59.0,1.0
1,76.0,107.0,2.0,12.0,2.0,2.0,12.0,0.0,0.0,0.0,...,0.0,0.0,63.0,2580.0,6835.0,current,high,0.0,137.0,1.0
2,31.0,85.0,1.0,16.0,2.0,1.0,2.0,0.0,0.0,0.0,...,0.0,0.0,44.0,2403.0,162397.0,current,high,0.0,107.0,2.0
3,2.0,67.0,11.0,26.0,4.0,3.0,262.0,0.0,1.0,0.0,...,0.0,0.0,38.0,2211.0,6560.0,single,medium,0.0,373.0,0.0
4,92.0,81.0,4.0,33.0,5.0,2.0,291.0,0.0,23.0,1.0,...,0.0,0.0,54.0,2216.0,7144.0,single,high,0.0,416.0,2.0
