# Exploratory Data Analysis (EDA)
This notebook explores the **Customer Combined Dataset** to understand its structure, quality, 
and underlying patterns. We will generate descriptive statistics, visualize trends, and summarize findings.

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set plot style
sns.set(style='whitegrid')

In [5]:
filename = "customer_combined_clean.csv"

if os.path.exists(filename):
    df = pd.read_csv(filename)
    print(f"✅ File '{filename}' loaded successfully!")
    display(df.head())  # preview first 5 rows
else:
    print(f"❌ File '{filename}' not found in {os.getcwd()}")

✅ File 'customer_combined_clean.csv' loaded successfully!


Unnamed: 0,CustomerID,Age,Gender,Location,IncomeLevel,SignupDate,TransactionID,TransactionDate,Amount,ProductCategory,PaymentMethod,InteractionID,InteractionDate,Platform,InteractionType,Sentiment
0,0009fdd2-ae63-45ca-8d5b-d0ea98381f7b,21,Female,Lake George,Low,2020-11-09,86cd577d-4ffd-498d-94ce-e68e6cca8865,2023-10-26,389.69,Home & Garden,Bank Transfer,26af70c2-acba-461c-95b8-8200de6b154a,2024-02-16,Instagram,Share,Positive
1,0009fdd2-ae63-45ca-8d5b-d0ea98381f7b,21,Female,Lake George,Low,2020-11-09,86cd577d-4ffd-498d-94ce-e68e6cca8865,2023-10-26,389.69,Home & Garden,Bank Transfer,4d9427f2-30fe-4298-ab66-8490e29202b5,2024-04-22,Facebook,Share,Neutral
2,000c6bbd-533a-432d-922c-ab64197e71c5,25,Male,North Oliviaton,High,2019-11-06,0dc30dbb-8109-4fa8-a7e0-f7ae108075e7,2023-05-17,500.82,Electronics,Debit Card,fff3e62c-3c78-4883-9a34-1d8aad5c1582,2023-07-11,Instagram,Like,Negative
3,000c6bbd-533a-432d-922c-ab64197e71c5,25,Male,North Oliviaton,High,2019-11-06,0dc30dbb-8109-4fa8-a7e0-f7ae108075e7,2023-05-17,500.82,Electronics,Debit Card,1b2e64c0-80f5-40ed-8d19-072eeb7f2b23,2024-03-15,Instagram,Comment,Positive
4,000c6bbd-533a-432d-922c-ab64197e71c5,25,Male,North Oliviaton,High,2019-11-06,0dc30dbb-8109-4fa8-a7e0-f7ae108075e7,2023-05-17,500.82,Electronics,Debit Card,72c0bb82-9307-4ba4-8925-a5100d3d0e56,2024-03-12,Twitter,Like,Neutral


In [6]:
# Shape of dataset
print("Rows:", df.shape[0])
print("Columns:", df.shape[1])

# Column names
print("\nColumns:", df.columns.tolist())

# Info on data types & missing values
df.info()

Rows: 5729
Columns: 16

Columns: ['CustomerID', 'Age', 'Gender', 'Location', 'IncomeLevel', 'SignupDate', 'TransactionID', 'TransactionDate', 'Amount', 'ProductCategory', 'PaymentMethod', 'InteractionID', 'InteractionDate', 'Platform', 'InteractionType', 'Sentiment']
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5729 entries, 0 to 5728
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CustomerID       5729 non-null   object 
 1   Age              5729 non-null   int64  
 2   Gender           5729 non-null   object 
 3   Location         5729 non-null   object 
 4   IncomeLevel      5729 non-null   object 
 5   SignupDate       5517 non-null   object 
 6   TransactionID    5729 non-null   object 
 7   TransactionDate  5593 non-null   object 
 8   Amount           5729 non-null   float64
 9   ProductCategory  5729 non-null   object 
 10  PaymentMethod    5729 non-null   object 
 11  InteractionID    5729 

In [7]:
# Numeric and categorical stats
df.describe(include='all').transpose()

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
CustomerID,5729.0,3000.0,7eb7f7df-9b0a-416a-829e-bcf4d8bd57dc,18.0,,,,,,,
Age,5729.0,,,,44.892651,16.191574,0.0,33.0,45.0,57.0,100.0
Gender,5729.0,2.0,Female,2909.0,,,,,,,
Location,5729.0,2696.0,West David,20.0,,,,,,,
IncomeLevel,5729.0,3.0,High,2333.0,,,,,,,
SignupDate,5517.0,1447.0,2024-06-08,32.0,,,,,,,
TransactionID,5729.0,3000.0,0dc30dbb-8109-4fa8-a7e0-f7ae108075e7,1576.0,,,,,,,
TransactionDate,5593.0,716.0,2023-05-17,1587.0,,,,,,,
Amount,5729.0,,,,496.106528,240.377345,0.0,377.08,500.82,612.51,999.86
ProductCategory,5729.0,5.0,Electronics,2738.0,,,,,,,


In [20]:
print("Missing values per column:")
print(df.isnull().sum())

Missing values per column:
CustomerID           0
Age                  0
Gender               0
Location             0
IncomeLevel          0
SignupDate         212
TransactionID        0
TransactionDate    136
Amount               0
ProductCategory      0
PaymentMethod        0
InteractionID        0
InteractionDate    143
Platform             0
InteractionType      0
Sentiment            0
dtype: int64
