In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("retail_store_sales.csv")

In [6]:
def get_variable_type(col_data):
    if np.issubdtype(col_data.dtype, np.number):
        if all(col_data % 1 == 0):  # เป็นจำนวนเต็ม
            return "Discrete"
        else:
            return "Continuous"
    else:
        unique_ratio = col_data.nunique() / len(col_data)
        return "Ordinal" if unique_ratio < 0.2 else "Nominal"

def explore_dataset(file_path):
    # โหลดข้อมูล
    df = pd.read_csv(file_path)
    summary = []
    
    for col in df.columns:
        col_data = df[col].dropna()  # ลบ missing values
        variable_type = get_variable_type(col_data)
        
        col_info = {
            "Column": col,
            "Data Type": col_data.dtype,
            "Unique Values": col_data.nunique(),
            "Variable Type": variable_type,
            "No. of Digits/No. of Chars": col_data.astype(str).apply(len).unique().tolist()
        }
        
        if variable_type in ["Discrete", "Continuous"]:  # ถ้าเป็นตัวเลข
            col_info["Min"] = col_data.min()
            col_info["Max"] = col_data.max()
        
        elif variable_type in ["Nominal", "Ordinal"]:  # ถ้าเป็นข้อมูลประเภทหมวดหมู่
            col_info["Avg Length"] = col_data.astype(str).apply(len).mean()
        
        summary.append(col_info)
    
    return pd.DataFrame(summary)

# ใช้งานฟังก์ชัน
file_path = "2.csv"  # เปลี่ยนเป็น path ของไฟล์จริง
summary_df = explore_dataset(file_path)
print(summary_df)

              Column Data Type  Unique Values Variable Type  \
0     Transaction ID    object          12575       Nominal   
1        Customer ID    object             25       Ordinal   
2           Category    object              8       Ordinal   
3               Item    object            200       Ordinal   
4     Price Per Unit   float64             25    Continuous   
5           Quantity   float64             10      Discrete   
6        Total Spent   float64            227    Continuous   
7     Payment Method    object              3       Ordinal   
8           Location    object              2       Ordinal   
9   Transaction Date    object           1114       Ordinal   
10  Discount Applied    object              2       Ordinal   

   No. of Digits/No. of Chars  Avg Length  Min    Max  
0                        [11]   11.000000  NaN    NaN  
1                         [7]    7.000000  NaN    NaN  
2   [10, 13, 8, 9, 4, 29, 34]   14.497097  NaN    NaN  
3                [1

In [7]:
data.describe()

Unnamed: 0,Price Per Unit,Quantity,Total Spent
count,11966.0,11971.0,11971.0
mean,23.365912,5.53638,129.652577
std,10.743519,2.857883,94.750697
min,5.0,1.0,5.0
25%,14.0,3.0,51.0
50%,23.0,6.0,108.5
75%,33.5,8.0,192.0
max,41.0,10.0,410.0
