In [2]:
import pandas as pd
from impala.dbapi import connect
from impala.util import as_pandas
import ast
import math

import redis_io as redis_io

##############################################################################################################
# REDIS-KEY
##############################################################################################################
# monthly_sales_vol:{year}   << agg_montly_sales_volumn(year,unit_numofproduct, unit_totalamount)
# desc_total_sales_vol:{year}   << def desc_total_sales_volumn(year):
# monthly_total_amount_per_cate:{year}   << def agg_montly_total_amount_by_product_cate(year):
# monthly_total_amount_per_product:{year}   << def agg_montly_total_amount_by_product(year, product_cate):
# timebase_sales_amount:{year}:{day_of_week}  << def analysis_timebase_sales_amount(year, day_of_week):
##############################################################################################################

def agg_montly_sales_volumn(year,unit_numofproduct, unit_totalamount):
    
    # Redis read cache value
    REDIS_KEY = "monthly_sales_vol:{0}".format(year)
    cached_monthly_sales_vol = redis_io.read_transaction(REDIS_KEY)
    
    if cached_monthly_sales_vol != None:
        return cached_monthly_sales_vol
    #
    
    conn = connect(host='salest-master-server', port=21050)
    cur = conn.cursor()

    cur.execute('USE salest')
    cur.execute("""
        SELECT year_month, SUM(num_of_product) AS num_of_product, SUM(sales_amount) AS total_amount
        FROM (
            SELECT SUBSTR(date_receipt_num,1,7) AS year_month, num_of_product, sales_amount
            FROM ext_tr_receipt WHERE SUBSTR(date_receipt_num,1,4) = '""" + year +
        """'
        ) view_tr_recipt
        GROUP BY year_month ORDER BY year_month ASC
        """
    )
    df = as_pandas(cur)
    conn.close()
    
    ### Fill non-included monthly row with zero base values.
    month_index_arr = []

    for month in range(1,13):
        month_index_arr.append("{0}-{1:02d}".format(year,month))
    
    df_base_index = pd.DataFrame(data=month_index_arr, columns=['year_month'])
    df_all_monatly_sales_volume = pd.merge(df, df_base_index, on='year_month',how='outer').fillna(0).sort_values(by='year_month',ascending='1')
    ###

    df_list = list(df_all_monatly_sales_volume.itertuples(index=False))
    df_column_name_list = list(df.columns.values)

    list_month_sales_volume = []
    dict_month_sales_volume = {}

    for row in df_list:
        dict_month_sales_volume = {}
        
        for key,value in zip(df_column_name_list, row):
            if(key=='num_of_product'):
                value = int(round(value / unit_numofproduct))
            if(key=='total_amount'):
                value = int(round(value / unit_totalamount))
            dict_month_sales_volume[key] = value
        
        list_month_sales_volume.append(dict_month_sales_volume.copy())

    # Redis save cache value
    redis_io.write_transaction(REDIS_KEY, list_month_sales_volume)
    #
    
    return list_month_sales_volume



In [3]:
agg_montly_sales_volumn('2015',1,10000)

[{'num_of_product': 1621, 'total_amount': 450, 'year_month': '2015-01'},
 {'num_of_product': 1582, 'total_amount': 450, 'year_month': '2015-02'},
 {'num_of_product': 1747, 'total_amount': 505, 'year_month': '2015-03'},
 {'num_of_product': 1317, 'total_amount': 379, 'year_month': '2015-04'},
 {'num_of_product': 2585, 'total_amount': 651, 'year_month': '2015-05'},
 {'num_of_product': 2091, 'total_amount': 574, 'year_month': '2015-06'},
 {'num_of_product': 2335, 'total_amount': 671, 'year_month': '2015-07'},
 {'num_of_product': 2984, 'total_amount': 855, 'year_month': '2015-08'},
 {'num_of_product': 1997, 'total_amount': 598, 'year_month': '2015-09'},
 {'num_of_product': 1889, 'total_amount': 537, 'year_month': '2015-10'},
 {'num_of_product': 1291, 'total_amount': 391, 'year_month': '2015-11'},
 {'num_of_product': 1343, 'total_amount': 395, 'year_month': '2015-12'}]

In [5]:
redis_io.read_transaction("monthly_sales_vol:2015")