In [2]:
#--------------------Robustness: Price Grid--------------------#

print('Running Price Grid')
import numpy as np
import pandas as pd

cln_data_path = '../output/Data Background/'
df_1g = pd.read_excel(cln_data_path + 'df_1g_first.xlsx')
df_2g = pd.read_excel(cln_data_path + 'df_2g_first.xlsx')
df_1g = df_1g[df_1g['is_seller'] == 1]
df_2g = df_2g[df_2g['is_seller'] == 1]

counts_low_quality_1s,  bin_edges = np.histogram(df_1g['price_low_quality'],  bins=[10,30,50,70,90,110,130,150,170,190,210], density=True)
counts_high_quality_1s, bin_edges = np.histogram(df_1g['price_high_quality'], bins=[10,30,50,70,90,110,130,150,170,190,210], density=True)
counts_low_quality_2s,  bin_edges = np.histogram(df_2g['price_low_quality'],  bins=[10,30,50,70,90,110,130,150,170,190,210], density=True)
counts_high_quality_2s, bin_edges = np.histogram(df_2g['price_high_quality'], bins=[10,30,50,70,90,110,130,150,170,190,210], density=True)
bin_edges = bin_edges + 10

low_q_string_1s = "(" + str(bin_edges[0]) + ", " + str(counts_low_quality_1s[0]) + ") "
high_q_string_1s = "(" + str(bin_edges[0]) + ", " + str(counts_high_quality_1s[0]) + ") "
low_q_string_2s = "(" + str(bin_edges[0]) + ", " + str(counts_low_quality_2s[0]) + ") "
high_q_string_2s = "(" + str(bin_edges[0]) + ", " + str(counts_high_quality_2s[0]) + ") "
for i in range(len(bin_edges)-2):
    low_q_string_1s += "(" + str(bin_edges[i+1]) + ", " + str(counts_low_quality_1s[i+1]) + ") "
    high_q_string_1s += "(" + str(bin_edges[i+1]) + ", " + str(counts_high_quality_1s[i+1]) + ") "
    low_q_string_2s += "(" + str(bin_edges[i+1]) + ", " + str(counts_low_quality_2s[i+1]) + ") "
    high_q_string_2s += "(" + str(bin_edges[i+1]) + ", " + str(counts_high_quality_2s[i+1]) + ") "


tex_code = r"""
\begin{figure}\flushleft
\begin{subfigure}[b]{0.4\textwidth}
\begin{tikzpicture}[scale=0.8]
\begin{axis}[ybar stacked, bar width=15pt, ytick={}, yticklabels={}, xlabel={Price Range}, enlarge x limits=0.15, ymax=0.03]
\addplot+[ybar, fill=blue] plot coordinates {"""

tex_code += low_q_string_1s

tex_code += r"""};
\addplot+[ybar, fill=red] plot coordinates {"""

tex_code += high_q_string_1s

tex_code += r"""};
\end{axis}
\end{tikzpicture}
\caption{$n = 1$}
\end{subfigure}
\hspace{0.01\textwidth}
\begin{subfigure}[b]{0.4\textwidth}
\begin{tikzpicture}[scale=0.8]
\begin{axis}[ybar stacked, bar width=15pt, ytick={}, yticklabels={}, xlabel={Price Range}, enlarge x limits=0.15, ymax=0.03]
\addplot+[ybar, fill=blue] plot coordinates {"""

tex_code += low_q_string_2s

tex_code += r"""};
\addplot+[ybar, fill=red] plot coordinates {"""

tex_code += high_q_string_2s

tex_code += r"""};
\legend{Low Quality, High Quality}
\end{axis}
\end{tikzpicture}
\caption{$n = 2$}
\end{subfigure}
\caption{Histogram of Low and High Quality Prices}
\label{pgrid_hist}
\end{figure}
"""

with open("../output/Price Grid/pgrid_hist.tex", "w") as file:
    file.write(tex_code)
print('Created pgrid_hist.tex')

total_count_1s = len(df_1g.index)
low_q_counts = df_1g['price_low_quality'].value_counts().sort_index()/total_count_1s
high_q_counts = df_1g['price_high_quality'].value_counts().sort_index()/total_count_1s
df_1g = pd.concat([low_q_counts, high_q_counts], axis=1).fillna(0)
df_1g['max'] = df_1g.max(axis=1)
inf_1g = 0.5 * df_1g['max'].sum()
print('Informativeness when n = 1: ', inf_1g)

total_count_2s = len(df_2g.index)
low_q_counts = df_2g['price_low_quality'].value_counts().sort_index()/total_count_2s
high_q_counts = df_2g['price_high_quality'].value_counts().sort_index()/total_count_2s
df_2g = pd.concat([low_q_counts, high_q_counts], axis=1).fillna(0)
df_2g['max'] = df_2g.max(axis=1)
inf_2g = 0.5 * df_2g['max'].sum()
print('Informativeness when n = 2: ', inf_2g)
print('\n')

Running Price Grid
Created pgrid_hist.tex
Informativeness when n = 1:  0.6743589743589744
Informativeness when n = 2:  0.6605769230769232
