In [1]:
'''
Import python libraries
'''

import pandas as pd
import numpy as np

import psycopg2 as db_connect

import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('default')

from tqdm import tqdm
tqdm.pandas()

import warnings
warnings.filterwarnings('ignore')

print('Libraries Loaded')

Libraries Loaded


In [2]:
'''
Set up connection to fetch data from Queenspark
'''

host_name = 'queenspark-new.crqm6nr3vrbv.ap-southeast-1.rds.amazonaws.com'
db_user = 'readonly'
db_password = 'readonly'
db_name = 'queenspark'

connection = db_connect.connect(
                                host=host_name,
                                user=db_user,
                                password=db_password,
                                database=db_name
                               )
 
cursor = connection.cursor()

In [3]:
%%time
'''
Import latest prices
'''

query_price_price = '''
SELECT entry_id, date, price_avg
FROM price_price
where 1=1
  and period = 'w'
  and date > '2022-05-30'
;
'''
cursor.execute(query_price_price)
result = cursor.fetchall()
col_names = []
for elt in cursor.description:
    col_names.append(elt[0])
price_master = pd.DataFrame(result, columns = col_names)

CPU times: user 1.36 s, sys: 449 ms, total: 1.81 s
Wall time: 58.1 s


In [4]:
'''
Import forecasted price master
'''

query_forecast = '''
SELECT entry_id, date, price_avg
FROM price_price_forecasted
where 1=1
  and period = 'w'
;
'''
cursor.execute(query_forecast)
result = cursor.fetchall()
col_names = []
for elt in cursor.description:
    col_names.append(elt[0])
forecasted_master = pd.DataFrame(result, columns = col_names)

In [None]:
%%time
'''
Import as-is PCP weight master
'''

query_rank = '''
SELECT 
	t1.market_id,
	t1.entry_id,
	t1.market_score,
	t1.product_name,
	t1.variety_label,
	t1.grade_label,
	t1.other_attributes_label,
	t1.cultivation_label,
	t1.origin_region,
	t1.market_region,
	pmv."rank" AS variety_factor_rank,
	pmg."rank" AS grade_factor_rank,
	pmo."rank" AS other_attributes_facor_rank,
	pmct."rank" AS cultivation_factor_rank,
	pmr."rank" AS region_factor_rank
FROM (
		SELECT
		  pp.id   AS market_id,
		  pe.market_score AS market_score,
		  pe.id   AS entry_id,
		  pi.id   AS item_id,
		  pi.product_id AS product_id,
		  ip.name AS product_name,
		  pi.variety_raw,
		  via.id AS variety_id,
		  via.label  AS variety_label,
		  pi.grade_raw,
		  gia.id AS grade_id,
		  gia.label  AS grade_label,
		  oia.attribute_id AS other_attributes_id,
		  oia.label  AS other_attributes_label,
		  cultivation.type_id AS cultivation_type_id,
		  cultivation.attribute_id AS cultivation_attribute_id,
		  cultivation."label" AS cultivation_label,
		  CASE
		   WHEN pi.origin_type = 'd' THEN 'domestic'
		   WHEN pi.origin_type = 'i' THEN 'imported'
		   END  AS origin_type,
		  por.id AS origin_region_id,
		  por.name_label AS origin_region,
		  cc.code AS origin_country_id,
		  cc.name  AS origin_country,
		  pr.id AS market_region_id,
		  pr.name_label AS market_region,
		  piu.unit_raw AS original_unit,
		  piu.unit_label AS normalized_unit,
		  piu.unit_rate,
		  CASE
		   WHEN piu.unit = 'p' THEN 'pound'
		   WHEN piu.unit = 'k' THEN 'kg'
		   END  AS final_unit
		FROM price_entry pe
		  JOIN price_item pi     ON pe.item_id = pi.id
		  JOIN price_productcountryprice pp ON pi.product_country_price_id = pp.id
		  LEFT JOIN price_item_attributes pia ON pi.id = pia.item_id
		  LEFT JOIN price_item_grades pig  ON pig.item_id = pi.id
		  LEFT JOIN price_region pr   ON pr.id = pe.region_id
		  LEFT JOIN price_region por   ON por.id = pi.origin_region_id
		  LEFT JOIN choice_country cc   ON cc.code = pi.origin_country_id
		  LEFT JOIN price_itemunit piu   ON piu.id = pe.item_unit_id
		  LEFT JOIN insight_product ip   ON ip.id = pi.product_id
		  LEFT JOIN insight_attribute via  ON via.id = pi.variety_id
		  LEFT JOIN insight_attribute gia  ON gia.id = pig.attribute_id
		  LEFT JOIN (		   				
			  			SELECT 
								oct.type_id
								, oct."type"
								, ia.id AS attribute_id
								, ia."label" AS LABEL
							FROM insight_attribute ia
							JOIN (
								SELECT 
									id AS type_id 
									, type AS "type"
								FROM insight_attributetype
								WHERE 1=1
								AND "type" != 'cultivation_type'
							) oct ON ia.type_id = oct.type_id
			) oia      ON oia.attribute_id = pia.attribute_id
		  LEFT JOIN (
		  				SELECT 
								ct.type_id
								, ct."type"
								, ia.id AS attribute_id
								, ia."label" AS LABEL
							FROM insight_attribute ia
							JOIN (
								SELECT 
									id AS type_id 
									, type AS "type"
								FROM insight_attributetype
								WHERE 1=1
								AND "type" = 'cultivation_type'
							) ct ON ia.type_id = ct.type_id
			) cultivation  ON cultivation.attribute_id = pia.attribute_id
) t1
LEFT JOIN price_marketfactor pmv ON t1.market_id = pmv.market_id AND t1.variety_id = pmv.variety_id 
LEFT JOIN price_marketfactor pmr ON t1.market_id = pmr.market_id AND t1.market_region_id = pmr.origin_region_id
LEFT JOIN price_marketfactor pmg ON t1.market_id = pmg.market_id AND t1.grade_id = pmg.grade_id
LEFT JOIN price_marketfactor pmo ON t1.market_id = pmo.market_id AND t1.other_attributes_id = pmo.attribute_id
LEFT JOIN price_marketfactor pmct ON t1.market_id = pmct.market_id AND t1.cultivation_attribute_id = pmct.cultivation_type_id
;
'''
cursor.execute(query_rank)
result = cursor.fetchall()
col_names = []
for elt in cursor.description:
    col_names.append(elt[0])
asis_weight_master = pd.DataFrame(result, columns = col_names)

asis_weight_master2 = asis_weight_master.fillna(0)
asis_weight_master2['sum_squared_rank'] = asis_weight_master2['variety_factor_rank'] ** 2 + asis_weight_master2['region_factor_rank'] ** 2 + asis_weight_master2['grade_factor_rank'] ** 2 + asis_weight_master2['other_attributes_facor_rank'] ** 2 + asis_weight_master2['cultivation_factor_rank'] ** 2

'''
Minmax => Reverse scaled result => Make sum to 1
'''
def normalization(Series):
    to_scale = Series[(Series != 0) & (Series.notnull())]
    scaled = (to_scale - to_scale.min()) / (to_scale.max() - to_scale.min())
    scaled_reverse = 1 - scaled
    scaled_reverse.replace(0, scaled_reverse[scaled_reverse != 0].min() / 2, inplace = True)

    Series = pd.concat([
                        Series[(Series == 0) | (Series.isnull())],
                        scaled_reverse
             ]).sort_index()
    normalizer = 1 / Series.sum()
    Series = Series * normalizer
    
    if not Series.any():
        Series.replace(np.nan, 1 / len(Series), inplace = True)
    
    return Series

asis_weight_master2['normalized_rank'] = asis_weight_master2.groupby('market_id')['sum_squaed_rank'].apply(lambda Series : normalization(Series))


In [None]:
%%time
'''
Import to-be PCP weight master
'''

query6 = '''
WITH attr_T AS (
	SELECT
		pe.id AS entry_id
		, pi.id AS item_id
		, oia.id AS attribute_id
		, via.id AS variety_id
		, gia.id AS grade_id
	FROM price_entry pe
		JOIN price_item pi ON pe.item_id = pi.id
		LEFT JOIN price_item_attributes pia ON pi.id = pia.item_id -- To link price_item & insight_attribute
		LEFT JOIN insight_attribute oia  ON pia.attribute_id = oia.id -- TO link pia & insight_attribute
		LEFT JOIN price_item_grades pig  ON pi.id = pig.item_id -- To link price_item & insight_attribute
		LEFT JOIN insight_attribute gia  ON pig.attribute_id = gia.id -- TO link grade & insight_attribute
		LEFT JOIN insight_attribute via  ON pi.variety_id = via.id -- To link variety & insight_attribute
	WHERE 1=1
	and pi.status = any (array ['s', 'm', 'a'])
	and pi.is_active = true
	and pe.is_active = true
	and pe.representative_score > 0
)


SELECT
	pcp.id AS market_id
	, entry_T.*
FROM price_entry pe
LEFT JOIN price_item pi ON pe.item_id = pi.id
LEFT JOIN price_productcountryprice pcp ON pi.product_country_price_id = pcp.id
LEFT JOIN (
			SELECT
				entry_id
				, sum(COALESCE (attribute_sum , 0) + COALESCE (variety_sum , 0) + COALESCE (grade_sum , 0)) AS attribute_sum
			FROM (
			----------------------------------------attribute_T--------------------------------------------
					SELECT
						attribute_T.*
						, variety_T.variety_sum
						, grade_T.grade_sum
					FROM (
						SELECT
							entry_id
							, sum(COALESCE (supply_cnt, 0) + COALESCE (fulfillment_item_cnt, 0) + COALESCE (journal_cnt, 0)) AS attribute_sum
						FROM (
							SELECT
								supply_T.*
								, fulfillment_T.fulfillment_item_cnt
								, journal_T.journal_cnt
							FROM (
									SELECT
										attr_T.entry_id
										, attr_T.attribute_id
										, count(csa.supply_id) AS supply_cnt
									FROM attr_T
									LEFT JOIN commerce_supply_attributes csa ON attr_T.attribute_id = csa.attribute_id
									GROUP BY attr_T.entry_id, attr_T.attribute_id
							) supply_T
							LEFT JOIN (
									SELECT
										attr_T.entry_id
										, attr_T.attribute_id
										, count(cf.id) AS fulfillment_item_cnt
									FROM attr_T
									LEFT JOIN commerce_fulfillmentitem cf ON attr_T.attribute_id = cf.attribute_id
									GROUP BY attr_T.entry_id, attr_T.attribute_id
							) fulfillment_T ON supply_T.entry_id = fulfillment_T.entry_id AND supply_T.attribute_id = fulfillment_T.attribute_id
							LEFT JOIN (
									SELECT
										attr_T.entry_id
										, attr_T.attribute_id
										, count(ijpa.id) AS journal_cnt
									FROM attr_T
									LEFT JOIN intelligence_journal_primary_attributes ijpa ON attr_T.attribute_id = ijpa.attribute_id
									GROUP BY attr_T.entry_id, attr_T.attribute_id
							) journal_T ON supply_T.entry_id = journal_T.entry_id AND supply_T.attribute_id = journal_T.attribute_id
						) t1
						GROUP BY entry_id
					) attribute_T
					LEFT JOIN (
					----------------------------------------variety_T--------------------------------------------
							SELECT
								entry_id
								, variety_sum
							FROM (
									SELECT
										entry_id
										, sum(COALESCE (supply_cnt, 0) + COALESCE (fulfillment_item_cnt, 0) + COALESCE (journal_cnt, 0)) AS variety_sum
									FROM (
											SELECT
												supply_T.*
												, fulfillment_T.fulfillment_item_cnt
												, journal_T.journal_cnt
											FROM (
													SELECT
														attr_T.entry_id
														, attr_T.variety_id
														, count(csa.supply_id) AS supply_cnt
													FROM attr_T
													LEFT JOIN commerce_supply_attributes csa ON attr_T.variety_id = csa.attribute_id
													GROUP BY attr_T.entry_id, attr_T.variety_id
											) supply_T
											LEFT JOIN (
													SELECT
														attr_T.entry_id
														, attr_T.variety_id
														, count(cf.id) AS fulfillment_item_cnt
													FROM attr_T
													LEFT JOIN commerce_fulfillmentitem cf ON attr_T.variety_id = cf.attribute_id
													GROUP BY attr_T.entry_id, attr_T.variety_id
											) fulfillment_T ON supply_T.entry_id = fulfillment_T.entry_id AND supply_T.variety_id = fulfillment_T.variety_id
											LEFT JOIN (
													SELECT
														attr_T.entry_id
														, attr_T.variety_id
														, count(ijpa.id) AS journal_cnt
													FROM attr_T
													LEFT JOIN intelligence_journal_primary_attributes ijpa ON attr_T.variety_id = ijpa.attribute_id
													GROUP BY attr_T.entry_id, attr_T.variety_id
											) journal_T ON supply_T.entry_id = journal_T.entry_id AND supply_T.variety_id = journal_T.variety_id
									) t2
									GROUP BY entry_id
							) variety
					) variety_T ON attribute_T.entry_id = variety_T.entry_id
					LEFT JOIN (
					----------------------------------------grade_T--------------------------------------------
							SELECT
								entry_id
								, grade_sum
							FROM (
									SELECT
										entry_id
										, sum(COALESCE (supply_cnt, 0) + COALESCE (fulfillment_item_cnt, 0) + COALESCE (journal_cnt, 0)) AS grade_sum
									FROM (
										SELECT
											supply_T.*
											, fulfillment_T.fulfillment_item_cnt
											, journal_T.journal_cnt
										FROM (
												SELECT
													attr_T.entry_id
													, attr_T.grade_id
													, count(csa.supply_id) AS supply_cnt
												FROM attr_T
												LEFT JOIN commerce_supply_attributes csa ON attr_T.grade_id = csa.attribute_id
												GROUP BY attr_T.entry_id, attr_T.grade_id
										) supply_T
										LEFT JOIN (
												SELECT
													attr_T.entry_id
													, attr_T.grade_id
													, count(cf.id) AS fulfillment_item_cnt
												FROM attr_T
												LEFT JOIN commerce_fulfillmentitem cf ON attr_T.grade_id = cf.attribute_id
												GROUP BY attr_T.entry_id, attr_T.grade_id
										) fulfillment_T ON supply_T.entry_id = fulfillment_T.entry_id AND supply_T.grade_id = fulfillment_T.grade_id
										LEFT JOIN (
												SELECT
													attr_T.entry_id
													, attr_T.grade_id
													, count(ijpa.id) AS journal_cnt
												FROM attr_T
												LEFT JOIN intelligence_journal_primary_attributes ijpa ON attr_T.grade_id = ijpa.attribute_id
												GROUP BY attr_T.entry_id, attr_T.grade_id
										) journal_T ON supply_T.entry_id = journal_T.entry_id AND supply_T.grade_id = journal_T.grade_id
									) t3
									GROUP BY entry_id
							) grade
						) grade_T ON attribute_T.entry_id = grade_T.entry_id
			) total_T
			GROUP BY entry_id
		) entry_T ON pe.id = entry_T.entry_id
WHERE 1=1
and pi.status = any (array ['s', 'm', 'a'])
and pi.is_active = true
and pe.is_active = true
and pe.representative_score > 0
;
'''
cursor.execute(query6)
result = cursor.fetchall()
col_names = []
for elt in cursor.description:
    col_names.append(elt[0])
tobe_weight_master = pd.DataFrame(result, columns = col_names)

def softmax(Series):
    scaled_Series = (Series - Series.min()) / (Series.max() - Series.min())
    if not scaled_Series.any():
        scaled_Series.replace(np.nan, 1 / len(scaled_Series), inplace = True)

    exp_Series = np.exp(scaled_Series)
    sum_exp_Series = np.sum(exp_Series)
    soft_max = exp_Series / sum_exp_Series
    return soft_max

tobe_weight_master['attribute_sum'] = tobe_weight_master['attribute_sum'].astype(int)
tobe_weight_master['soft_max'] = tobe_weight_master.groupby(['market_id']).apply(lambda df: softmax(df['attribute_sum'])).reset_index().set_index('level_1').drop('market_id', axis=1)
tobe_weight_master2 = tobe_weight_master.copy()
tobe_weight_master2['normalized_rank'] = tobe_weight_master2['soft_max']

In [None]:
'''
Import raw csv files(whitelist entries as of 2022-05-30)
'''

whitelisted = pd.read_csv('../CSV/Whitelisted.csv')
entry_master = pd.read_csv('../CSV/Entry_Master.csv')
product_master = pd.read_csv('../CSV/Product_Master.csv')

In [None]:
'''
Raw data tables 
'''

print('''
Ready tables:

whitelisted
entry_master
product_master
price_master
forecasted_master
asis_weight_master2
''')

In [None]:
'''
Initial data cleansing and table processing
Index start date = '2020-01-01'
'''

price_master['date'] = price_master['date'].astype(str)
forecasted_master['date'] = forecasted_master['date'].astype(str)

price_master_filter = price_master[price_master['entry_id'].isin(list(whitelisted['entry_id'].unique()))]
price_master_filter['market_id'] = np.nan
price_master_filter = price_master_filter[['market_id', 'entry_id', 'date', 'price_avg']].drop_duplicates()
actual = pd.concat([whitelisted[['market_id', 'entry_id', 'date', 'price_avg']], price_master_filter], axis = 0).sort_values(by = ['entry_id','date'], ascending = [True, True])
actual['market_id'] = actual.groupby('entry_id')['market_id'].transform('first')

forecast = pd.merge(forecasted_master, entry_master[['market_id', 'entry_id']].drop_duplicates(), on = 'entry_id', how = 'left')
forecast = forecast[['market_id', 'entry_id', 'date', 'price_avg']]
forecast = forecast[~forecast['date'].isin(list(actual['date'].unique()))]

df_raw = pd.concat([actual, forecast], axis = 0).sort_values(by = ['entry_id','date'], ascending = [True, True])
df_raw_play = df_raw[df_raw['date'] >= '2020-01-01']
unique_entry_lst = df_raw_play['entry_id'].unique()

In [None]:
'''
Function for imputation type #1
'''

def first_price(DataFrame, start = '2020-01-01', end=forecast['date'].max(), freq='w-Mon', unique_entry_lst = unique_entry_lst):
    time_index= pd.date_range(start=start, end=end, freq=freq)
    empty_DataFrame = pd.DataFrame(columns = DataFrame.columns)
    #unique_entry_lst = list(DataFrame['entry_id'].unique())
    
    for uniq in tqdm(unique_entry_lst):
        no_time = 0
        new_DataFrame = DataFrame[DataFrame['entry_id'] == uniq].reset_index(drop=True)

        while True:
            # when first value isn't existed
            if new_DataFrame['date'].iloc[0] != time_index[no_time].strftime('%Y-%m-%d'):
                no_time += 1
            # when first value is equal to time_index
            else:
                break

        if no_time != 0:
            for i in range(0, no_time):
                new_row = new_DataFrame.iloc[0]
                new_row['date'] = time_index[i].strftime('%Y-%m-%d')
                empty_DataFrame = empty_DataFrame.append(new_row)
        else:
            pass
    
    DataFrame = pd.concat([DataFrame, empty_DataFrame], axis = 0).sort_values(by = ['entry_id','date'], ascending = [True, True]).reset_index(drop=True)
    
    return DataFrame

In [None]:
'''
Function for imputation type #2
'''

def mean_price(DataFrame, unique_entry_lst = unique_entry_lst):
    #unique_entry_lst = list(DataFrame['entry_id'].unique())
    empty_DataFrame = pd.DataFrame(columns = DataFrame.columns)
    
    for uniq in unique_entry_lst:
        new_DataFrame = DataFrame[DataFrame['entry_id'] == uniq].reset_index(drop=True)
        null_lst = new_DataFrame[new_DataFrame['price_avg'].isna()].index.tolist()

        for i in null_lst:
            before_price = new_DataFrame['price_avg'].iloc[i - 1]
            j = 1
            check_price = new_DataFrame['price_avg'].iloc[i + j]

            # check nan
            while True:
                # if nana
                if float(check_price) != check_price:
                    j += 1
                    check_price = new_DataFrame['price_avg'].iloc[i + j]

                else:
                    next_price = float(check_price)
                    break

            new_DataFrame['price_avg'].iloc[i] = (before_price + next_price) / 2
            
        empty_DataFrame = empty_DataFrame.append(new_DataFrame)

    return empty_DataFrame

In [None]:
'''
Function for double checking if we have full time range prices for all entries
'''

def full_time_range(DataFrame, unique_entry_lst = unique_entry_lst):
    empty_DataFrame = pd.DataFrame(columns = DataFrame.columns)

    time_df = pd.DataFrame(pd.date_range(start='2020-01-01', end=forecast['date'].max(), freq='w-Mon'), columns = {'date'})
    time_df['date'] = time_df['date'].astype(str)

    for uniq in unique_entry_lst:
        new_DataFrame = DataFrame[DataFrame['entry_id'] == uniq].reset_index(drop = True)
        if len(new_DataFrame) == len(time_df):
            new_DataFrame = new_DataFrame.copy()
        else:
            new_DataFrame = pd.merge(time_df, new_DataFrame, on = 'date', how = 'left')
            new_DataFrame.loc[new_DataFrame['price_avg'].isna(), 'price_avg'] = new_DataFrame.loc[new_DataFrame['price_avg'].notnull(), 'price_avg'].iloc[-1]
            new_DataFrame['market_id'] = new_DataFrame['market_id'].iloc[0]
            new_DataFrame['entry_id'] = new_DataFrame['entry_id'].iloc[0]
        
        empty_DataFrame = empty_DataFrame.append(new_DataFrame)

    return empty_DataFrame 

In [None]:
'''
Function for computing final market index
'''

def calculate_market_index(DataFrame, weightedDataFrame, column_name, unique_entry_lst = unique_entry_lst):

    relative_changes = []

    for uniq in tqdm(unique_entry_lst):
        new_DataFrame = DataFrame[DataFrame['entry_id'] == uniq].reset_index(drop = True)
        new_DataFrame['price_avg'] = new_DataFrame['price_avg'].astype(float)
        new_DataFrame['base_price'] = float(new_DataFrame['price_avg'][0])
        new_DataFrame['relative_change'] = (( new_DataFrame['price_avg'] - new_DataFrame['base_price'] ) / new_DataFrame['base_price']) * 100
        entry_weight = float(weightedDataFrame[weightedDataFrame['entry_id'] == uniq][column_name].unique())
        relative_changes.append(np.array(new_DataFrame['relative_change'] * entry_weight))

    market_final_change = np.sum(relative_changes, axis=0)
    result_df = DataFrame[['market_id', 'date']].drop_duplicates()
    result_df['market_final_change'] = market_final_change
    result_df['market_index'] = 100 + market_final_change
    
    return result_df

In [None]:
'''
Function for producing visualization graph
'''

def draw_avg_rate(DataFrame, market_id, country, item):
    df = DataFrame[DataFrame['market_id'] == market_id]
    plt.figure(figsize = (24, 10))
    plt.plot(df['date'], df['market_index'], linestyle='-', marker='o', color='black')

    plt.legend(labels = ['Relative Index'], fontsize = 12, loc = 'best')
    plt.xlabel('Date', fontsize = 8)
    plt.xticks(rotation=90)
    plt.ylabel('Index(Base=100)')
    plt.title('Market id: {}, Country: {}, Product: {}'.format(market_id, country, item), fontsize=18)
    plt.show()

In [None]:
'''
Function for producing multiple visualization graphs
'''

def draw_avg_rate2(DataFrame1, DataFrame2, market_id, country, item):
    df1 = DataFrame1[DataFrame1['market_id'] == market_id]
    df2 = DataFrame2[DataFrame2['market_id'] == market_id]
    plt.figure(figsize = (24, 10))
    plt.plot(df1['date'], df1['market_index'], linestyle='-', marker='o', color='black')
    plt.plot(df2['date'], df2['market_index'], linestyle='-', marker='o', color='red')

    plt.legend(labels = ['Relative Index'], fontsize = 12, loc = 'best')
    plt.xlabel('Date', fontsize = 8)
    plt.xticks(rotation=90)
    plt.ylabel('Index(Base=100)')
    plt.title('Market id: {}, Country: {}, Product: {}'.format(market_id, country, item), fontsize=18)
    plt.show()

In [None]:
'''
Sample market information
'''

market_info = {42: "Fresh Avocado, MX",
               212: "Fresh Orange, EG",
               977: "Raw Cashew Nut, TZ",
               596: "Fresh Avocado, PE",
               566: "Fresh Tahiti Lime, MX",
               63: "Fresh Mature Coconut, IN",
               239: "Fresh Apple, TR",
               1148: "Chicken Egg, TR",
               1397: "Radish, KR",
               136: "Fresh Mango, PE",
               7530: "Saffron, IR"}

In [None]:
'''
Generate market index for selected single market [AS-IS weight system]
'''

market_id = 596

# start with
df_check = df_raw_play[df_raw_play['market_id'] == market_id]
unique_entry_lst = list(df_check['entry_id'].unique())

# asis dictionary
asis_key = list(asis_weight_master2[asis_weight_master2['entry_id'].isin(unique_entry_lst)]['entry_id'])
asis_value = list(asis_weight_master2[asis_weight_master2['entry_id'].isin(unique_entry_lst)]['normalized_rank'])
asis_weight_dic = dict(zip(asis_key, asis_value))

df_check = df_check[df_check['entry_id'].isin(asis_key)]
unique_entry_lst = list(df_check['entry_id'].unique())

df_check2 = first_price(DataFrame = df_check, start = '2020-01-01', end=forecast['date'].max(), freq='w-Mon', unique_entry_lst = unique_entry_lst)
df_check3 = mean_price(DataFrame = df_check2, unique_entry_lst = unique_entry_lst)
df_check4 = full_time_range(DataFrame = df_check3, unique_entry_lst = unique_entry_lst)
final_index = calculate_market_index(DataFrame = df_check4, weightedDataFrame = asis_weight_master2, column_name = 'normalized_rank', unique_entry_lst = unique_entry_lst)
draw_avg_rate(final_index, market_id=market_id, country=market_info[market_id].split(",")[1], item=market_info[market_id].split(",")[0])

In [None]:
%%time
'''
Generate market index for all sample markets [AS-IS weight system]
'''

final_df = list()
for market_id in market_info.keys():
    # start with
    df_check = df_raw_play[df_raw_play['market_id'] == market_id]
    unique_entry_lst = list(df_check['entry_id'].unique())

    # asis dictionary
    asis_key = list(asis_weight_master2[asis_weight_master2['entry_id'].isin(unique_entry_lst)]['entry_id'])
    asis_value = list(asis_weight_master2[asis_weight_master2['entry_id'].isin(unique_entry_lst)]['normalized_rank'])
    asis_weight_dic = dict(zip(asis_key, asis_value))

    df_check = df_check[df_check['entry_id'].isin(asis_key)]
    unique_entry_lst = list(df_check['entry_id'].unique())

    if len(unique_entry_lst) == 0:
        print('Failed market_id: {}'.format(market_id))
        continue
    else:
        df_check2 = first_price(DataFrame = df_check, start = '2020-01-01', end=forecast['date'].max(), freq='w-Mon', unique_entry_lst = unique_entry_lst)
        df_check3 = mean_price(DataFrame = df_check2, unique_entry_lst = unique_entry_lst)
        df_check4 = full_time_range(DataFrame = df_check3, unique_entry_lst = unique_entry_lst)
        globals()['market_entry_df_%s' % market_id] = df_check4.copy()
        globals()['market_index_df_%s' % market_id] = calculate_market_index(DataFrame = df_check4, weightedDataFrame = asis_weight_master2, column_name = 'normalized_rank', unique_entry_lst = unique_entry_lst)
        print('Completed market_id: {}'.format(market_id))

    final_df.append(globals()['market_index_df_%s' % market_id])
final_df = pd.concat(final_df)


In [None]:
'''
Generate market index for selected single market [TO-BE weight system]
'''

market_id = 596

# start with
df_check = df_raw_play[df_raw_play['market_id'] == market_id]
unique_entry_lst = list(df_check['entry_id'].unique())

# asis dictionary
tobe_key = list(tobe_weight_master2[tobe_weight_master2['entry_id'].isin(unique_entry_lst)]['entry_id'])
tobe_value = list(tobe_weight_master2[tobe_weight_master2['entry_id'].isin(unique_entry_lst)]['normalized_rank'])
tobe_weight_dic = dict(zip(tobe_key, tobe_value))

df_check = df_check[df_check['entry_id'].isin(tobe_key)]
unique_entry_lst = list(df_check['entry_id'].unique())

df_check2 = first_price(DataFrame = df_check, start = '2020-01-01', end=forecast['date'].max(), freq='w-Mon', unique_entry_lst = unique_entry_lst)
df_check3 = mean_price(DataFrame = df_check2, unique_entry_lst = unique_entry_lst)
df_check4 = full_time_range(DataFrame = df_check3, unique_entry_lst = unique_entry_lst)
final_index = calculate_market_index(DataFrame = df_check4, weightedDataFrame = tobe_weight_master2, column_name = 'normalized_rank', unique_entry_lst = unique_entry_lst)
draw_avg_rate(final_index, market_id=market_id, country=market_info[market_id].split(",")[1], item=market_info[market_id].split(",")[0])

In [None]:
%%time
'''
Generate market index for all sample markets [TO-BE weight system]
'''

final_df2 = list()
for market_id in market_info.keys():
    # start with
    df_check = df_raw_play[df_raw_play['market_id'] == market_id]
    unique_entry_lst = list(df_check['entry_id'].unique())

    # asis dictionary
    tobe_key = list(tobe_weight_master2[tobe_weight_master2['entry_id'].isin(unique_entry_lst)]['entry_id'])
    tobe_value = list(tobe_weight_master2[tobe_weight_master2['entry_id'].isin(unique_entry_lst)]['normalized_rank'])
    tobe_weight_dic = dict(zip(tobe_key, tobe_value))

    df_check = df_check[df_check['entry_id'].isin(tobe_key)]
    unique_entry_lst = list(df_check['entry_id'].unique())

    if len(unique_entry_lst) == 0:
        print('Failed market_id: {}'.format(market_id))
        continue
    else:
        df_check2 = first_price(DataFrame = df_check, start = '2020-01-01', end=forecast['date'].max(), freq='w-Mon', unique_entry_lst = unique_entry_lst)
        df_check3 = mean_price(DataFrame = df_check2, unique_entry_lst = unique_entry_lst)
        df_check4 = full_time_range(DataFrame = df_check3, unique_entry_lst = unique_entry_lst)
        globals()['market_entry_df_%s' % market_id] = df_check4.copy()
        globals()['market_index_df_%s' % market_id] = calculate_market_index(DataFrame = df_check4, weightedDataFrame = tobe_weight_master2, column_name = 'normalized_rank', unique_entry_lst = unique_entry_lst)
        print('Completed market_id: {}'.format(market_id))

    final_df2.append(globals()['market_index_df_%s' % market_id])
final_df2 = pd.concat(final_df2)


In [None]:
for market_id in market_info.keys():
    draw_avg_rate2(final_df, final_df2, market_id=market_id, country=market_info[market_id].split(",")[1], item=market_info[market_id].split(",")[0])

In [None]:
len(list(whitelisted['market_id'].unique()))

In [None]:
as_is_cov = asis_weight_master2.copy()
as_is_cov['sum'] = asis_weight_master2['variety_factor_rank'] + asis_weight_master2['grade_factor_rank'] + asis_weight_master2['other_attributes_facor_rank'] + asis_weight_master2['cultivation_factor_rank'] + asis_weight_master2['region_factor_rank']
as_is_cov = as_is_cov[as_is_cov['sum']!=0]
len(as_is_cov['market_id'].unique()), len(as_is_cov[as_is_cov['market_id'].isin(list(whitelisted['market_id'].unique()))]['market_id'].unique())

In [None]:
to_be_cov = tobe_weight_master2.copy()
len(to_be_cov['market_id'].unique()), len(to_be_cov[to_be_cov['market_id'].isin(list(whitelisted['market_id'].unique()))]['market_id'].unique())