## Setting up data

In [None]:
import numpy as np
import random
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import statistics as stat
import math
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn import metrics

## Reading Wine dataset & exploring data

In [None]:
wine = pd.read_csv('./wine-clustering.csv')
pd.set_option('precision', 2) #Setting dataframe precision
pd.set_option('display.max_rows', 50) #Setting max rows to display

print(f"The original data shape is {wine.shape}")
wine

### Visualize the dataset

In [None]:
%matplotlib inline
import seaborn as sns

sns.set(font_scale=1.1)
sns.set_style('whitegrid')

grid = sns.pairplot(data=wine, vars=wine.columns[0:4], hue='Alcohol')

### Create a TSNE Estimator to reduce the data down to two dimensions

In [None]:
tsne = TSNE(n_components=2, random_state=13)
reduced_wine = tsne.fit_transform(wine)

print(f'Reduced data shape: reduced_digits.shape = {reduced_wine.shape}')

#### Visualize the reduced data

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

dots = plt.scatter(reduced_wine[:, 0], reduced_wine[:, 1])

### PCA Analysis

In [None]:
pca = PCA(n_components=2, random_state=13)  # reduce to two components
pca.fit(wine)
reduced_pca = pca.transform(wine)

reduced_df = pd.DataFrame(reduced_pca, columns = ['Component 1', 'Component 2'])
reduced_df['Alcohol'] = wine.Alcohol

In [None]:
axes = sns.scatterplot(data=reduced_df, hue = 'Alcohol' ,x='Component 1', y='Component 2') 