In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [7]:
# Load the dataset
sales_data = pd.read_csv("C:/Users/Gurinder/Desktop/Brainy Tasks/100_Sales.csv")

In [8]:
# Selecting only numerical columns for Z-Score and mean-centering
numerical_columns = sales_data.select_dtypes(include=['float64', 'int64']).columns
numerical_data = sales_data[numerical_columns]

In [9]:
# Handle missing values
numerical_data = numerical_data.dropna(axis=1, how='all')  # Drop columns with all NaN values
numerical_data = numerical_data.fillna(numerical_data.median())  # Replace remaining NaN with column median

In [10]:
# Update numerical_columns after handling missing values
numerical_columns = numerical_data.columns  # Update to reflect actual columns in numerical_data

In [11]:
# 1. Apply Z-Score Standardization using sklearn
scaler = StandardScaler()
z_score_scaled = pd.DataFrame(scaler.fit_transform(numerical_data), columns=numerical_columns)
z_score_scaled.columns = [f"{col}_zscore" for col in z_score_scaled.columns]

In [12]:
# 2. Apply Mean-Centering
mean_centered = numerical_data - numerical_data.mean()
mean_centered.columns = [f"{col}_meancentered" for col in mean_centered.columns]

In [13]:
# Combine results with the original dataset
result = pd.concat([sales_data, z_score_scaled, mean_centered], axis=1)

In [14]:
# Save the result
output_file = "C:/Users/Gurinder/Desktop/Brainy Tasks/ZScore_MeanCentered_Sales.csv"
result.to_csv(output_file, index=False)

In [15]:
# Show the first 5 rows of the result
print("Preview of the Z-Score and Mean-Centered Data:")
print(result.head())

Preview of the Z-Score and Mean-Centered Data:
                              Region                Country        Item_Type  \
0              Australia and Oceania                 Tuvalu        Baby Food   
1  Central America and the Caribbean                Grenada           Cereal   
2                             Europe                 Russia  Office Supplies   
3                 Sub_Saharan Africa  Sao Tome and Principe           Fruits   
4                 Sub_Saharan Africa                 Rwanda  Office Supplies   

  Sales_Channel Order_Priority   Ship_Date  Unit_Cost  Total_Revenue  \
0       Offline              H  27/06/2010     159.42     2533654.00   
1        Online              C  15/09/2012     117.11      576782.80   
2       Offline              L  05/08/2014     524.96     1158502.59   
3        Online              C  07/05/2014       6.92       75591.66   
4       Offline              L  02/06/2013     524.96     3296425.02   

   Total_Profit  Unnamed: 9  Unnamed: 1

In [16]:
print(f"Z-Score Standardization and Mean-Centering completed! File saved to {output_file}.")

Z-Score Standardization and Mean-Centering completed! File saved to C:/Users/Gurinder/Desktop/Brainy Tasks/ZScore_MeanCentered_Sales.csv.
