In [1]:
import pandas as pd

# Load the dataset
file_path = 'QVI_data.csv'  
data = pd.read_csv(file_path)

# Convert DATE column to datetime format
data['DATE'] = pd.to_datetime(data['DATE'])

# Extract year and month from DATE for aggregation
data['YEAR_MONTH'] = data['DATE'].dt.to_period('M')

# Aggregate data at a monthly level
monthly_data = data.groupby(['YEAR_MONTH', 'STORE_NBR']).agg({
    'TOT_SALES': 'sum',
    'LYLTY_CARD_NBR': 'nunique',
    'TXN_ID': 'count'
}).reset_index()

# Rename columns for clarity
monthly_data.rename(columns={
    'TOT_SALES': 'total_sales',
    'LYLTY_CARD_NBR': 'total_customers',
    'TXN_ID': 'total_transactions'
}, inplace=True)

# Calculate average number of transactions per customer
monthly_data['avg_transactions_per_customer'] = monthly_data['total_transactions'] / monthly_data['total_customers']

# Display the first few rows of the aggregated data
print(monthly_data.head())


  YEAR_MONTH  STORE_NBR  total_sales  total_customers  total_transactions  \
0    2018-07          1        206.9               49                  52   
1    2018-07          2        150.8               39                  41   
2    2018-07          3       1205.7              112                 138   
3    2018-07          4       1399.9              128                 160   
4    2018-07          5        812.0               93                 120   

   avg_transactions_per_customer  
0                       1.061224  
1                       1.051282  
2                       1.232143  
3                       1.250000  
4                       1.290323  
