In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler, PowerTransformer, FunctionTransformer
import warnings
warnings.filterwarnings('ignore')

In [4]:
df = pd.read_csv('Wine_Data.csv')

In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline
0,0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [6]:
data = df[['Alcohol', 'Magnesium']]

In [7]:
data.head()

Unnamed: 0,Alcohol,Magnesium
0,14.23,127
1,13.2,100
2,13.16,101
3,14.37,113
4,13.24,118


# Normalization / min max scalar 

In [8]:
scalar = MinMaxScaler()

In [13]:
data['mm_alcohol'] = scalar.fit_transform(df[['Alcohol']]) 

In [14]:
data.head()

Unnamed: 0,Alcohol,Magnesium,mm_alcohol
0,14.23,127,0.842105
1,13.2,100,0.571053
2,13.16,101,0.560526
3,14.37,113,0.878947
4,13.24,118,0.581579


# Standardization

In [15]:
standard = StandardScaler()

In [18]:
data['std_Alcohol'] = standard.fit_transform(data[['Alcohol']]) 

In [20]:
data['Alcohol'] = df.Alcohol

In [21]:
data.head()

Unnamed: 0,Alcohol,Magnesium,mm_alcohol,std_Alcohol
0,14.23,127,0.842105,1.518613
1,13.2,100,0.571053,0.24629
2,13.16,101,0.560526,0.196879
3,14.37,113,0.878947,1.69155
4,13.24,118,0.581579,0.2957


# Log transformation

In [35]:
log = FunctionTransformer(np.log1p, validate=True)  # np.log1p is used for log(1 + x) to handle zeros in the data
data['Log_Alcohol'] = log.fit_transform(data[['Alcohol']])

In [36]:
data['Log_Alcohol'] = np.log(data['Alcohol'])

In [37]:
data.head()

Unnamed: 0,Alcohol,Magnesium,mm_alcohol,std_Alcohol,Log_Alcohol
0,14.23,127,0.842105,1.518613,2.655352
1,13.2,100,0.571053,0.24629,2.580217
2,13.16,101,0.560526,0.196879,2.577182
3,14.37,113,0.878947,1.69155,2.665143
4,13.24,118,0.581579,0.2957,2.583243


# Power tranformations

In [38]:
power = PowerTransformer()

In [39]:
data['pow_Alcohol'] = power.fit_transform(df[['Alcohol']])

In [40]:
data.head()

Unnamed: 0,Alcohol,Magnesium,mm_alcohol,std_Alcohol,Log_Alcohol,pow_Alcohol
0,14.23,127,0.842105,1.518613,2.655352,1.530388
1,13.2,100,0.571053,0.24629,2.580217,0.238381
2,13.16,101,0.560526,0.196879,2.577182,0.188751
3,14.37,113,0.878947,1.69155,2.665143,1.708049
4,13.24,118,0.581579,0.2957,2.583243,0.288052


# Alternative method power tranformation/ Box-Cox

In [46]:
power_transformer = PowerTransformer(method='box-cox')

In [47]:
data['box_cox_Alcohol'] = power_transformer.fit_transform(df[['Alcohol']])

In [48]:
data.head()

Unnamed: 0,Alcohol,Magnesium,mm_alcohol,std_Alcohol,Log_Alcohol,pow_Alcohol,robust_alcohol,box_cox_Alcohol
0,14.23,127,0.842105,1.518613,2.655352,1.530388,0.897338,1.529934
1,13.2,100,0.571053,0.24629,2.580217,0.238381,0.114068,0.238692
2,13.16,101,0.560526,0.196879,2.577182,0.188751,0.08365,0.189069
3,14.37,113,0.878947,1.69155,2.665143,1.708049,1.003802,1.7074
4,13.24,118,0.581579,0.2957,2.583243,0.288052,0.144487,0.288354


# Robust Scalar 

In [41]:
robust = RobustScaler()

In [42]:
data['robust_alcohol'] = robust.fit_transform(df[['Alcohol']])

In [43]:
data.head()

Unnamed: 0,Alcohol,Magnesium,mm_alcohol,std_Alcohol,Log_Alcohol,pow_Alcohol,robust_alcohol
0,14.23,127,0.842105,1.518613,2.655352,1.530388,0.897338
1,13.2,100,0.571053,0.24629,2.580217,0.238381,0.114068
2,13.16,101,0.560526,0.196879,2.577182,0.188751,0.08365
3,14.37,113,0.878947,1.69155,2.665143,1.708049,1.003802
4,13.24,118,0.581579,0.2957,2.583243,0.288052,0.144487


# Box-Cox technique

In [45]:
data.head()

Unnamed: 0,Alcohol,Magnesium,mm_alcohol,std_Alcohol,Log_Alcohol,pow_Alcohol,robust_alcohol,box_cox_Alcohol
0,14.23,127,0.842105,1.518613,2.655352,1.530388,0.897338,1.529934
1,13.2,100,0.571053,0.24629,2.580217,0.238381,0.114068,0.238692
2,13.16,101,0.560526,0.196879,2.577182,0.188751,0.08365,0.189069
3,14.37,113,0.878947,1.69155,2.665143,1.708049,1.003802,1.7074
4,13.24,118,0.581579,0.2957,2.583243,0.288052,0.144487,0.288354
