In [1]:
import pandas as pd
import os
pd.__version__

'2.2.2'

In [2]:
root_dir = os.path.dirname(os.getcwd())
data_dir = os.path.join(root_dir, 'data')
data_set_path = os.path.join(data_dir, 'sales_subset.csv')

In [3]:
"""Read File"""
sales = pd.read_csv(data_set_path)
sales = sales.drop(columns=['Unnamed: 0'])

In [4]:
def min_max_normalize(df):
    """Normalize the DataFrame using Min-Max scaling (between 0 and 1)."""
    return (df - df.min()) / (df.max() - df.min())

def z_score_normalize(df):
    """Normalize the DataFrame using Z-score normalization (standardization)."""
    return (df - df.mean()) / df.std()

def max_normalize(df):
    """Normalize the DataFrame using Max normalization (scale to max value)."""
    return df / df.max()

def robust_scale(df):
    """Normalize the DataFrame using Robust scaling (IQR)."""
    return (df - df.median()) / (df.quantile(0.75) - df.quantile(0.25))
    

print("Original DataFrame:")
print(sales[['temperature_c']])

print("\nMin-Max Normalized DataFrame:")
print(min_max_normalize(sales[['temperature_c']].agg(min_max_normalize)))

print("\nZ-Score Normalized DataFrame:")
print(z_score_normalize(sales[['temperature_c']].agg(z_score_normalize)))

print("\nMax Normalized DataFrame:")
print(max_normalize(sales[['temperature_c']].agg(max_normalize)))

print("\nRobust Scaled DataFrame:")
print(robust_scale(sales[['temperature_c']].agg(robust_scale)))

Original DataFrame:
       temperature_c
0           5.727778
1           8.055556
2          16.816667
3          22.527778
4          27.050000
...              ...
10769       9.644444
10770      15.938889
10771      27.288889
10772      25.644444
10773      22.250000

[10774 rows x 1 columns]

Min-Max Normalized DataFrame:
       temperature_c
0           0.334036
1           0.389203
2           0.596840
3           0.732192
4           0.839368
...              ...
10769       0.426860
10770       0.576037
10771       0.845030
10772       0.806057
10773       0.725609

[10774 rows x 1 columns]

Z-Score Normalized DataFrame:
       temperature_c
0          -1.008239
1          -0.773642
2           0.109317
3           0.684892
4           1.140648
...              ...
10769      -0.613511
10770       0.020853
10771       1.164724
10772       0.998994
10773       0.656897

[10774 rows x 1 columns]

Max Normalized DataFrame:
       temperature_c
0           0.169322
1           0.2