In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy.stats import beta
import plotly.express as px
from datetime import datetime

import warnings
# Filter out all warnings
warnings.filterwarnings('ignore', category=Warning)

In [2]:
gmv = pd.read_csv('data/Shopify_Monthly_Order - gmv.csv')
#Remove all commas from all of the columns except 'BRAND' and then convert the columns to numeric
gmv = gmv.replace(',','', regex=True)
gmv = gmv.apply(pd.to_numeric, errors='ignore')
#Drop the column "2024/4" 
gmv = gmv.drop(columns=['2024/04'])
gmv.sample(5)

Unnamed: 0,BRAND,2022/01,2022/02,2022/03,2022/04,2022/05,2022/06,2022/07,2022/08,2022/09,...,2023/06,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03
7,Afloral,1036044.15,1069473.4,1425123.82,1133530.76,1101153.5,1049304.11,1185829.47,1353447.02,1599209.94,...,955300.53,1053526.07,1405067.92,1796844.22,2018891.28,3677305.69,640692.72,978652.23,916196.68,988270.78
339,Las Jaras Wines,22829.33,566096.06,110129.12,178231.52,407059.45,65859.97,58848.91,433144.56,195695.51,...,75834.86,43925.33,61696.25,24911.22,48033.98,67650.45,66380.37,17215.5,24502.53,33085.09
39,Bad Birdie,463813.46,551418.12,753694.62,941929.09,1160310.63,1493435.42,1356098.38,1531392.45,811100.08,...,1634358.26,1308805.85,1098328.57,735437.09,716223.49,2310278.26,1562606.47,619713.64,813374.99,1136714.14
181,East Olivia,30243.72,50845.36,25837.08,58260.03,58404.2,30126.71,26930.3,44339.92,38710.33,...,36737.0,43472.35,31346.96,27436.5,26864.15,23641.09,16755.06,45.54,,
485,Pheroe,6681.69,6461.35,5641.09,6549.17,7084.25,5109.23,7043.17,9446.08,8529.04,...,2834.9,2573.24,819.47,1254.28,4799.81,7145.62,4043.64,1814.3,1330.49,4126.88


In [3]:
orders = pd.read_csv('data/Shopify_Monthly_Order - orders.csv')
orders = orders.replace(',','', regex=True)
orders = orders.apply(pd.to_numeric, errors='ignore')
#Drop the column "2024/4" 
orders = orders.drop(columns=['2024/04'])
orders.sample(5)

Unnamed: 0,BRAND,2022/01,2022/02,2022/03,2022/04,2022/05,2022/06,2022/07,2022/08,2022/09,...,2023/06,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03
645,Whitewater Kids,1.0,8.0,14.0,8.0,9.0,3.0,7.0,7.0,8.0,...,4.0,8.0,4.0,6.0,7.0,2.0,,3.0,7.0,4.0
195,EMBR Brand,363.0,593.0,471.0,571.0,515.0,468.0,459.0,488.0,331.0,...,528.0,397.0,391.0,389.0,535.0,621.0,468.0,455.0,515.0,452.0
479,Parker Clay,1584.0,1143.0,1858.0,1560.0,1581.0,1218.0,1656.0,1363.0,2171.0,...,1258.0,1321.0,1403.0,2454.0,1579.0,2805.0,2994.0,825.0,937.0,
490,PMD Beauty,4706.0,4088.0,3611.0,4364.0,3892.0,2271.0,3295.0,2877.0,2227.0,...,2317.0,3532.0,2715.0,2240.0,2135.0,3692.0,3370.0,1227.0,,
235,FullyVital,131.0,187.0,187.0,144.0,139.0,179.0,152.0,75.0,104.0,...,125.0,144.0,126.0,123.0,116.0,114.0,194.0,200.0,188.0,193.0


In [4]:
def analyze_gmv_changes(df):
    results = []

    for index, row in df.iterrows():
        brand = row['BRAND']
        prev_month_gmv = None
        prev_month_name = None

        for month in df.columns[1:]:
            current_month_gmv = row[month]

            if pd.notna(prev_month_gmv) and pd.notna(current_month_gmv):
                if prev_month_gmv >= 100 and current_month_gmv >= 100:
                    percent_change = ((current_month_gmv - prev_month_gmv) / prev_month_gmv) * 100
                    
                    # Define the other year based on the current year
                    current_year = month[:4]
                    if current_year == '2024':
                        other_year = '2023'
                    elif current_year == '2023':
                        other_year = '2022'
                    else:
                        other_year = '2023'
                    
                    other_year_prev_month = f"{other_year}/{prev_month_name[5:]}"
                    other_year_current_month = f"{other_year}/{month[5:]}"

                    # Fetch GMVs for the same months in the other year
                    prev_other_year_gmv = df.loc[index, other_year_prev_month] if other_year_prev_month in df.columns else None
                    current_other_year_gmv = df.loc[index, other_year_current_month] if other_year_current_month in df.columns else None

                    other_year_change = None
                    if pd.notna(prev_other_year_gmv) and pd.notna(current_other_year_gmv) and prev_other_year_gmv >= 100 and current_other_year_gmv >= 100:
                        other_year_change = ((current_other_year_gmv - prev_other_year_gmv) / prev_other_year_gmv) * 100
                        if abs(percent_change) > 50 and abs(other_year_change - percent_change) > 50:
                            results.append(construct_result_row(brand, percent_change, prev_month_name, month, prev_month_gmv, current_month_gmv, other_year_change))
                    elif pd.isna(prev_other_year_gmv) or pd.isna(current_other_year_gmv):
                        if abs(percent_change) > 50:
                            results.append(construct_result_row(brand, percent_change, prev_month_name, month, prev_month_gmv, current_month_gmv, other_year_change))

            if pd.notna(prev_month_gmv) and pd.isna(current_month_gmv):
                results.append({
                    'Brand': brand,
                    'percent_change': None,
                    'prev_month': prev_month_name,
                    'current_month': month,
                    'prev_month_gmv': prev_month_gmv,
                    'current_month_gmv': current_month_gmv,
                    'other_year_change': None
                })

            prev_month_gmv = current_month_gmv
            prev_month_name = month

    results_df = pd.DataFrame(results)
    results_df['percent_change'] = round(results_df['percent_change']/100, 2)
    results_df['other_year_change'] = round(results_df['other_year_change']/100, 2)
    results_df = results_df[~results_df['current_month'].isin(['2024/04', '2023/11', '2022/11', '2022/12', '2023/12'])]
    return results_df

def construct_result_row(brand, percent_change, prev_month, current_month, prev_gmv, curr_gmv, other_year_change):
    return {
        'Brand': brand,
        'percent_change': percent_change,
        'prev_month': prev_month,
        'current_month': current_month,
        'prev_month_gmv': prev_gmv,
        'current_month_gmv': curr_gmv,
        'other_year_change': other_year_change  # Include this year-over-year change
    }

# Example usage:
# Assuming 'gmv' is your DataFrame containing the monthly GMV data for each brand
gmv_changes = analyze_gmv_changes(gmv)
gmv_changes.head(10)

Unnamed: 0,Brand,percent_change,prev_month,current_month,prev_month_gmv,current_month_gmv,other_year_change
0,12|12,0.79,2022/08,2022/09,31312.09,55925.46,-0.11
2,12|12,1.19,2023/07,2023/08,32394.81,71064.58,0.1
3,21Seeds Tequila,1.03,2022/03,2022/04,11684.72,23754.74,0.18
4,21Seeds Tequila,0.51,2023/01,2023/02,6140.99,9300.57,-0.12
5,21Seeds Tequila,-0.76,2023/02,2023/03,9300.57,2239.18,0.07
6,21Seeds Tequila,1.69,2023/06,2023/07,1525.04,4098.34,0.11
8,21Seeds Tequila,-0.59,2023/12,2024/01,2465.28,999.72,1.49
9,21Seeds Tequila,,2024/01,2024/02,999.72,,
10,A Kids Co.,0.57,2022/01,2022/02,95742.23,150452.35,-0.08
11,A Kids Co.,,2022/09,2022/10,49989.95,,


In [5]:
def analyze_order_volume_changes(df):
    results = []

    for index, row in df.iterrows():
        brand = row['BRAND']
        prev_month_order = None
        prev_month_name = None

        for month in df.columns[1:]:  # Assuming the first column is 'BRAND'
            current_month_order = row[month]

            if pd.notna(prev_month_order) and pd.notna(current_month_order):
                if prev_month_order >= 10 and current_month_order >= 10:
                    percent_change = ((current_month_order - prev_month_order) / prev_month_order) * 100
                    
                    # Define the other year based on the current year
                    current_year = month[:4]
                    if current_year == '2024':
                        other_year = '2023'
                    elif current_year == '2023':
                        other_year = '2022'
                    else:
                        other_year = '2023'
                    
                    other_year_prev_month = f"{other_year}/{prev_month_name[5:]}"
                    other_year_current_month = f"{other_year}/{month[5:]}"

                    # Fetch volumes for the same months in the other year
                    prev_other_year_vol = df.loc[index, other_year_prev_month] if other_year_prev_month in df.columns else None
                    current_other_year_vol = df.loc[index, other_year_current_month] if other_year_current_month in df.columns else None

                    other_year_change = None
                    if pd.notna(prev_other_year_vol) and pd.notna(current_other_year_vol) and prev_other_year_vol >= 10 and current_other_year_vol >= 10:
                        other_year_change = ((current_other_year_vol - prev_other_year_vol) / prev_other_year_vol) * 100
                        if abs(percent_change) > 50 and abs(other_year_change - percent_change) > 50:
                            results.append(construct_result_row(brand, percent_change, prev_month_name, month, prev_month_order, current_month_order, other_year_change))
                    elif pd.isna(prev_other_year_vol) or pd.isna(current_other_year_vol):
                        if abs(percent_change) > 50:
                            results.append(construct_result_row(brand, percent_change, prev_month_name, month, prev_month_order, current_month_order, other_year_change))

            if pd.notna(prev_month_order) and pd.isna(current_month_order):
                results.append({
                    'Brand': brand,
                    'percent_change': None,
                    'prev_month': prev_month_name,
                    'current_month': month,
                    'prev_month_order': prev_month_order,
                    'current_month_order': current_month_order,
                    'other_year_change': None
                })

            prev_month_order = current_month_order
            prev_month_name = month

    results_df = pd.DataFrame(results)
    results_df['percent_change'] = round(results_df['percent_change']/100, 2)
    results_df['other_year_change'] = round(results_df['other_year_change']/100, 2)
    #Drop rows when the current month is '2024/04', '2023/11', or '2022/11' or '2022/12' or '2023/12'
    results_df = results_df[results_df.current_month != '2024/04']
    results_df = results_df[results_df.current_month != '2023/11']
    results_df = results_df[results_df.current_month != '2022/11']
    results_df = results_df[results_df.current_month != '2022/12']
    results_df = results_df[results_df.current_month != '2023/12']
    return results_df

def construct_result_row(brand, percent_change, prev_month, current_month, prev_vol, curr_vol, other_year_change):
    return {
        'Brand': brand,
        'percent_change': percent_change,
        'prev_month': prev_month,
        'current_month': current_month,
        'prev_month_order': prev_vol,
        'current_month_order': curr_vol,
        'other_year_change': other_year_change  # Additional column for year-over-year change
    }

# Example usage:
# Assuming 'order_volume_df' is your DataFrame containing the monthly order volume data for each brand
order_changes = analyze_order_volume_changes(orders)
order_changes.head(10)

Unnamed: 0,Brand,percent_change,prev_month,current_month,prev_month_order,current_month_order,other_year_change
0,12|12,0.6,2022/08,2022/09,241.0,386.0,-0.03
2,12|12,0.53,2022/12,2023/01,206.0,315.0,1.3
3,12|12,0.62,2023/03,2023/04,277.0,449.0,0.04
4,12|12,1.22,2023/07,2023/08,232.0,515.0,0.09
5,12|12,0.51,2023/12,2024/01,333.0,504.0,-0.05
6,21Seeds Tequila,1.06,2022/03,2022/04,106.0,218.0,0.19
7,21Seeds Tequila,-0.74,2023/02,2023/03,81.0,21.0,0.08
8,21Seeds Tequila,1.87,2023/06,2023/07,15.0,43.0,-0.01
9,21Seeds Tequila,0.52,2023/09,2023/10,21.0,32.0,-0.15
10,21Seeds Tequila,,2024/01,2024/02,9.0,,


In [6]:
#for each brand, find the avg monthly GMV and avg monthly order volume
gmv['avg_monthly_gmv'] = round(gmv.iloc[:, 1:].mean(axis=1),0)
orders['avg_monthly_orders'] = round(orders.iloc[:, 1:].mean(axis=1),0)
gmv.sample(5)

Unnamed: 0,BRAND,2022/01,2022/02,2022/03,2022/04,2022/05,2022/06,2022/07,2022/08,2022/09,...,2023/07,2023/08,2023/09,2023/10,2023/11,2023/12,2024/01,2024/02,2024/03,avg_monthly_gmv
222,FNX,444992.29,403828.19,345205.26,413759.76,384559.87,262141.92,256695.95,381311.81,221380.44,...,117475.7,223759.7,127793.76,116461.78,106495.59,69802.89,70727.75,88078.05,66986.63,223356.0
243,Giften Market,109708.41,105051.33,107449.99,222769.75,236078.4,184132.16,156549.87,192635.55,170221.36,...,55902.53,61865.45,72301.59,132705.16,146507.32,319034.42,182571.09,171218.77,111053.35,151555.0
644,West & Willow,405622.02,278140.63,333964.27,417021.41,381631.81,341671.85,374522.46,601067.86,730047.64,...,629648.04,640308.59,605411.64,902424.18,3539032.05,3464164.05,627637.36,500012.14,510962.83,1129406.0
27,ARMRA Colostrum™,147500.0,173925.31,244401.6,297933.98,383217.31,446314.55,581695.58,735827.11,1060292.45,...,6914151.12,7440887.97,8621286.54,9775120.38,11987945.6,9778062.86,3735207.5,,,3769019.0
529,Ruggish Co.,77209.12,72401.22,61877.32,51290.75,49872.57,40327.9,44464.4,38490.88,38186.52,...,31122.04,30021.51,36942.62,21053.97,35758.11,12374.16,17934.78,17914.65,20395.29,38437.0


In [7]:
#Join avg_monthly_gmv to gmvs_changes
gmv_changes = gmv_changes.merge(gmv[['BRAND', 'avg_monthly_gmv']], left_on='Brand', right_on='BRAND', how='left')
#Drop 'BRAND' column
gmv_changes = gmv_changes.drop(columns=['BRAND'])
gmv_changes.head(5)

Unnamed: 0,Brand,percent_change,prev_month,current_month,prev_month_gmv,current_month_gmv,other_year_change,avg_monthly_gmv
0,12|12,0.79,2022/08,2022/09,31312.09,55925.46,-0.11,48464.0
1,12|12,1.19,2023/07,2023/08,32394.81,71064.58,0.1,48464.0
2,21Seeds Tequila,1.03,2022/03,2022/04,11684.72,23754.74,0.18,8920.0
3,21Seeds Tequila,0.51,2023/01,2023/02,6140.99,9300.57,-0.12,8920.0
4,21Seeds Tequila,-0.76,2023/02,2023/03,9300.57,2239.18,0.07,8920.0


In [8]:
#Join avg_monthly_orders to order_changes
order_changes = order_changes.merge(orders[['BRAND', 'avg_monthly_orders']], left_on='Brand', right_on='BRAND', how='left')
#Drop 'BRAND' column
order_changes = order_changes.drop(columns=['BRAND'])
order_changes.head(5)

Unnamed: 0,Brand,percent_change,prev_month,current_month,prev_month_order,current_month_order,other_year_change,avg_monthly_orders
0,12|12,0.6,2022/08,2022/09,241.0,386.0,-0.03,370.0
1,12|12,0.53,2022/12,2023/01,206.0,315.0,1.3,370.0
2,12|12,0.62,2023/03,2023/04,277.0,449.0,0.04,370.0
3,12|12,1.22,2023/07,2023/08,232.0,515.0,0.09,370.0
4,12|12,0.51,2023/12,2024/01,333.0,504.0,-0.05,370.0


In [9]:
order_changes.to_csv('data/order_change.csv', index=False)
gmv_changes.to_csv('data/gmv_change.csv', index=False)