In [6]:
import pandas as pd

# Load the dataset
df = pd.read_csv("Retail_sales_dataset.csv")

# Handle missing values (if any)
df.fillna(df.mean(numeric_only=True), inplace=True)

# Convert 'Customer_Segment' and other relevant columns to categorical if necessary
df['Customer_Segment'] = df['Customer_Segment'].astype('category')
df['Product_Category'] = df['Product_Category'].astype('category')
df['Preferred_Shopping_Channel'] = df['Preferred_Shopping_Channel'].astype('category')
df['Marital_Status'] = df['Marital_Status'].astype('category')

# Create income bins and age bins for analysis
df['Income_Bin'] = pd.cut(df['Annual_Income'], bins=5, labels=['Low', 'Lower-Mid', 'Mid', 'Upper-Mid', 'High'])
df['Age_Bin'] = pd.cut(df['Age'], bins=5, labels=['Youth', 'Young Adult', 'Middle Age', 'Senior', 'Elder'])

# Convert numerical values to categorical
df['Gender'] = df['Gender'].replace({0: 'Female', 1: 'Male'})
df['Marital_Status'] = df['Marital_Status'].replace({0: 'Single', 1: 'Married'})
df['Loyalty_Card'] = df['Loyalty_Card'].replace({0: 'No', 1: 'Yes'})
df['Discount_Avail'] = df['Discount_Avail'].replace({0: 'No', 1: 'Yes'})
df['Customer_Segment'] = df['Customer_Segment'].replace({0: 'Low Value', 1: 'Medium Value', 2: 'High Value'})

# Check data before export
print(df.head())



   Customer_ID     Customer_Name  Age  Gender  Annual_Income  Spending_Score  \
0            1       John Wilson   40    Male          87527               1   
1            2       Laura Stein   57  Female          75867              64   
2            3  Zachary Phillips   53    Male          76722              34   
3            4    Jose Dominguez   26  Female         142248              30   
4            5      Brent Thomas   50  Female         132721              31   

  Marital_Status Product_Category  Years_as_Customer  Number_of_Transactions  \
0         Single        Groceries                 15                      94   
1         Single         Clothing                  1                      72   
2        Married        Groceries                 13                      13   
3        Married           Sports                  3                      57   
4        Married             Home                 13                      56   

   Average_Transaction_Amount Loyalty_

In [8]:
# Export to CSV for Tableau
df.to_csv('Retail_sales_prepared_for_Tableau.csv', index=False)