In [64]:
import pandas as pd

data = {
    'Product': ['A', 'B', 'A', 'C', None, 'B', 'C', 'A', 'B', 'C', 'C', 'D', 'D'],
    'Sales_Q1': [1, 200, None, 150, 120, 130, None, 110, 180, 170, 900, None, -10],
    'Sales_Q2': [90, 210, 80, 140, "NA", 160, 130, 100, 450, "NA", 900000, "NA", 10000000]
}

def getPerformance(sales):
    if sales <= 100:
        return "low"
    elif 100 < sales < 300:
        return "medium"
    else:
        return "good"

def addZscoreColumn(df, col_name, sales):
    df[col_name] = (df[sales] - df[sales].mean()) / df[sales].std()

def addIsOutlierColumn(df, col_name, sales):
    q1 = df[sales].quantile(0.25)
    q3 = df[sales].quantile(0.75)
    range = 1.5*(q3 - q1)
    df[col_name] = df[sales].apply(lambda x: (x > (q3 + range)) or (x < (q1 - range)))

def main():
    df = pd.DataFrame.from_dict(data)
    df.dropna(subset='Product', axis=0, inplace=True)
    df.fillna(value={'Product':0, 'Sales_Q1':0, 'Sales_Q2':0}, axis=0, inplace=True)
    df.replace('NA', 0, inplace=True)

    df['total_sales'] = df['Sales_Q1'] + df['Sales_Q2']
    df['performance'] = df['total_sales'].apply(getPerformance)
    #addZscoreColumn(df, 'z-score_Q1', 'Sales_Q1')
    #addZscoreColumn(df, 'z-score_Q2', 'Sales_Q2')
    addZscoreColumn(df, 'z-score_total', 'total_sales')
    #addIsOutlierColumn(df, 'is_outlier_Q1', 'Sales_Q1')
    #addIsOutlierColumn(df, 'is_outlier_Q2', 'Sales_Q2')
    df['is_z_outlier'] = df['z-score_total'].apply(lambda x: abs(x) > 2)
    addIsOutlierColumn(df, 'is_iqr_outlier', 'total_sales')
    print(df)

main()

# sum of all A, mean, std

   Product  Sales_Q1  Sales_Q2  total_sales performance  z-score_total  \
0        A       1.0        90         91.0         low      -0.316033   
1        B     200.0       210        410.0        good      -0.315922   
2        A       0.0        80         80.0         low      -0.316036   
3        C     150.0       140        290.0      medium      -0.315963   
5        B     130.0       160        290.0      medium      -0.315963   
6        C       0.0       130        130.0      medium      -0.316019   
7        A     110.0       100        210.0      medium      -0.315991   
8        B     180.0       450        630.0        good      -0.315845   
9        C     170.0         0        170.0      medium      -0.316005   
10       C     900.0    900000     900900.0        good      -0.002678   
11       D       0.0         0          0.0         low      -0.316064   
12       D     -10.0  10000000    9999990.0        good       3.162520   

    is_z_outlier  is_iqr_outlier  
0 

  df.replace('NA', 0, inplace=True)


In [67]:
df = pd.DataFrame.from_dict(data)
df.dropna(subset='Product', axis=0, inplace=True)
df.fillna(value={'Product':0, 'Sales_Q1':0, 'Sales_Q2':0}, axis=0, inplace=True)
df = df.replace('NA', 0).infer_objects(copy=False)
df['sales_total'] = df['Sales_Q1'] + df['Sales_Q2']
#summary = df.groupby(['Product']).sum()
#part2 = df.groupby(['Product']).mean().rename(columns={'Sales_Q1': 'Q1_mean', 'Sales_Q2':'Q2_mean', 'sales_total':'total_mean'})
#part3 = df.groupby(['Product']).std().rename(columns={'Sales_Q1': 'Q1_std', 'Sales_Q2':'Q2_std', 'sales_total':'total_std'})
#summary = pd.merge(summary, part2, how='left', on=['Product'])
#summary = pd.merge(summary, part3, how='left', on=['Product'])
summary = df.groupby(['Product']).agg(['sum', 'mean', 'std'])
print(summary)



        Sales_Q1                     Sales_Q2                              \
             sum   mean         std       sum          mean           std   
Product                                                                     
A          111.0   37.0   63.221832       270  9.000000e+01  1.000000e+01   
B          510.0  170.0   36.055513       820  2.733333e+02  1.550269e+02   
C         1220.0  305.0  403.856410    900270  2.250675e+05  4.499550e+05   
D          -10.0   -5.0    7.071068  10000000  5.000000e+06  7.071068e+06   

        sales_total                              
                sum          mean           std  
Product                                          
A             381.0  1.270000e+02  7.209022e+01  
B            1330.0  4.433333e+02  1.724336e+02  
C          901490.0  2.253725e+05  4.503517e+05  
D         9999990.0  4.999995e+06  7.071061e+06  


  df = df.replace('NA', 0).infer_objects(copy=False)


In [112]:
# Original sales DataFrame
data = {
    'Product': ['A', 'B', 'A', 'C', None, 'B', 'C', 'A', 'B', 'C', 'D', 'E', 'D'],
    'Sales_Q1': [100, 200, None, 150, 120, 130, None, 110, 180, 170, 10, 400, 700],
    'Sales_Q2': [90, 210, 80, 140, None, 160, 130, 100, 200, None, 20, 500, 700]
}
df_sales = pd.DataFrame.from_dict(data)
df_sales = df_sales.dropna(subset=['Product'])
 
# New product info DataFrame
data_info = {
    'Product': ['A', 'B', 'C', 'D', 'E', 'F'],
    'Category': ['Electronics', 'Furniture', 'Electronics', 'Appliances', 'Furniture', 'Electronics'],
    'Region': ['North', 'South', 'East', 'North', 'West', 'East']
}
df_product_info = pd.DataFrame.from_dict(data_info)
df_sales['total_sales'] = df['Sales_Q1'] + df['Sales_Q2']
summary = df_sales.merge(df_product_info, on=['Product'])
summary = summary.groupby(['Category', 'Region'])[['total_sales']].agg(['sum', 'mean', 'std'])
#Q1 = summary['Sales_Q1'].merge(df_product_info, on=['Product'])
#Q2 = summary['Sales_Q2'].merge(df_product_info, on=['Product'])
#print(Q1)
#print(Q2)
print(summary)

                   total_sales                            
                           sum          mean           std
Category    Region                                        
Appliances  North   10900890.0  5.450445e+06  6.434028e+06
Electronics East         590.0  1.966667e+02  8.326664e+01
            North        381.0  1.270000e+02  7.209022e+01
Furniture   South       1330.0  4.433333e+02  1.724336e+02
            West           0.0  0.000000e+00           NaN


In [43]:
def delta(m1, m2):
    diff = abs(m1 - m2)
    if diff <= 6:
        return diff
    else:
        return 12 - diff

delta(4, 12)

4