In [None]:
import pandas as pd, numpy as np
from pathlib import Path

IN_DIR = Path("./outputs_low_sellers")
monthly_path = IN_DIR / r"E:\c drive\project\notebooks\outputs_top_items\outputs_low_sellers\bottom10_products_monthly.csv"
bottom_path  = IN_DIR / r"E:\c drive\project\notebooks\outputs_top_items\outputs_low_sellers\bottom10_products.csv"
out_path     = IN_DIR / "bottom10_features.csv"

monthly = pd.read_csv(monthly_path, parse_dates=['YearMonth'])
bottom  = pd.read_csv(bottom_path)

feat = (
    monthly.groupby('Description')
    .agg(
        Avg_Monthly_Quantity=('Month_Quantity','mean'),
        Avg_Monthly_Value=('Month_Value','mean'),
        Months_Active=('Month_Quantity','count'),
        Total_Quantity=('Month_Quantity','sum'),
        Total_Value=('Month_Value','sum'),
        Std_Monthly_Quantity=('Month_Quantity','std')
    )
    .reset_index()
)

all_months = pd.date_range(monthly['YearMonth'].min(),
                           monthly['YearMonth'].max(), freq='MS')
products = bottom['Description'].tolist()
full_index = pd.MultiIndex.from_product([all_months, products],
                                        names=['YearMonth','Description'])
full = monthly.set_index(['YearMonth','Description']).reindex(full_index,
                                                             fill_value=0).reset_index()

zero_ratio = (
    full.groupby('Description')['Month_Quantity']
    .apply(lambda x: (x==0).mean())
    .reset_index(name='Zero_Sales_Ratio')
)

last_sale = (
    full[full['Month_Quantity']>0]
    .groupby('Description')['YearMonth']
    .max()
    .reset_index()
    .rename(columns={'YearMonth':'Last_Sale_Date'})
)

max_month_ts = full['YearMonth'].max()
max_period = max_month_ts.to_period('M')
last_sale['Last_Sale_Months_Ago'] = (
    max_period - last_sale['Last_Sale_Date'].dt.to_period('M')
).apply(lambda p: p.n).astype(int)

feat = (feat
        .merge(zero_ratio, on='Description', how='left')
        .merge(last_sale[['Description','Last_Sale_Months_Ago']], on='Description', how='left'))

feat['Last_Sale_Months_Ago'] = feat['Last_Sale_Months_Ago'].fillna(feat['Months_Active'])
feat['CV_Monthly_Quantity'] = feat['Std_Monthly_Quantity'] / feat['Avg_Monthly_Quantity'].replace(0, np.nan)

# Ensure the output folder exists
out_path.parent.mkdir(parents=True, exist_ok=True)

# Save features
feat.to_csv(out_path, index=False)
print(f"✅ Saved feature file at: {out_path}")
print(feat.head())


✅ Saved feature file at: outputs_low_sellers\bottom10_features.csv
                          Description  Avg_Monthly_Quantity  \
0               *Boombox Ipod Classic                   1.0   
1    BISCUIT TIN, MINT,IVORY, VINTAGE                   1.0   
2  BLUE WHITE PLASTIC RINGS LAMPSHADE                   1.0   
3         CAT W SUNGLASSES BLANK CARD                   4.0   
4           CHAMPAGNE TRAY BLANK CARD                   1.0   

   Avg_Monthly_Value  Months_Active  Total_Quantity  Total_Value  \
0              16.98              1               1        16.98   
1               6.75              1               1         6.75   
2               0.85              1               1         0.85   
3               0.76              1               4         0.76   
4               0.19              1               1         0.19   

   Std_Monthly_Quantity  Zero_Sales_Ratio  Last_Sale_Months_Ago  \
0                   NaN          0.923077                     0   
1          