Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 84 additions & 39 deletions Murray/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,13 +385,23 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
tickfont=dict(size=12, color='black'))
)


custom_data = []
for s in sorted_sizes:
custom_data.append([s] * len(periods))
for i, s in enumerate(sorted_sizes):
mde_data = []
for period in periods:
mde = sensitivity_results[s][period].get('MDE', None)
mde_data.append([
s, # Treatment size
f"{mde:.2%}" if mde is not None else "N/A" # MDE
])
custom_data.append(mde_data)

fig.data[0].customdata = custom_data
fig.data[0].hovertemplate = "Treatment size: %{customdata}<br><extra></extra>"
fig.data[0].hovertemplate = (
"Treatment size: %{customdata[0]}<br>" +
"MDE: %{customdata[1]}<br>" +
"<extra></extra>"
)
fig.data[0].hoverinfo = "skip"

return fig
Expand All @@ -400,6 +410,39 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz




































def print_weights(geo_test, treatment_percentage=None, num_locations=None):
"""
Extracts control group weights based on holdout percentage or number of locations.
Expand Down Expand Up @@ -686,11 +729,26 @@ def print_incremental_results(geo_test, period, treatment_percentage):
holdout_percentage = 100 - treatment_percentage
title = "Incremental Results"
att, incremental, fig = plot_impact_streamlit_app(geo_test, period, holdout_percentage)

# Get the MDE from the sensitivity_results
sensitivity_results = geo_test['sensitivity_results']
results_by_size = geo_test['simulation_results']

target_size_key = None
target_mde = None
for size_key, result in results_by_size.items():
current_holdout = result['Holdout Percentage']
if abs(current_holdout - holdout_percentage) < 0.01:
target_size_key = size_key
target_mde = sensitivity_results[size_key][period].get('MDE', None)
break

print("=" * 30)
print(title.center(30))
print("=" * 30)
print(f"ATT: {round(att,2)}")
print(f"Lift total: {round(incremental,2)}")
print(f"MDE: {round(target_mde*100,2)}%")
print("=" * 30)


Expand Down Expand Up @@ -913,12 +971,19 @@ def plot_impact_evaluation(results_evaluation):
treatment (array): Treatment group values
period (int): Treatment period length
"""


counterfactual = results_evaluation['predictions']
treatment = results_evaluation['treatment']
period = results_evaluation['period']
length_treatment = results_evaluation['length_treatment']

if len(treatment.shape) > 1:
treatment = treatment.squeeze()

if len(counterfactual.shape) > 1:
counterfactual = counterfactual.squeeze()

point_difference = treatment - counterfactual
cumulative_effect = ([0] * (len(treatment) - period)) + (np.cumsum(point_difference[len(treatment)-period:])).tolist()

Expand Down Expand Up @@ -1118,81 +1183,63 @@ def plot_permutation_test(results_evaluation, Significance_level=0.1):
Returns:
fig: Plotly figure.
"""

null_stats = results_evaluation['null_stats']
observed_stat = results_evaluation['observed_stat']


upper_bound = np.percentile(null_stats, 100 * (1 - (Significance_level / 2)))
lower_bound = np.percentile(null_stats, 100 * (Significance_level / 2))



kde = stats.gaussian_kde(null_stats)
x_kde = np.linspace(min(null_stats), max(null_stats), 300)
kde = stats.gaussian_kde(null_stats, bw_method='scott')
x_kde = np.linspace(min(null_stats), max(null_stats), 100)
y_kde = kde(x_kde)


max_hist_y = max(kde(null_stats))

max_hist_y = np.max(y_kde) * 1.1

fig = go.Figure()


fig.add_trace(go.Histogram(
x=null_stats,
nbinsx=30,
nbinsx=20,
histnorm='probability density',
name="Null Stats",
marker=dict(color=blue,line=dict(color="black",width=1)),
marker=dict(color=blue, line=dict(color="black", width=1)),
opacity=0.6
))


fig.add_trace(go.Scatter(
x=x_kde,
y=y_kde,
mode="lines",
name="KDE Density",
showlegend=False,

line=dict(color="darkblue", width=2)
))



fig.add_trace(go.Scatter(
x=[observed_stat, observed_stat],
y=[0, max_hist_y],
y=[0, max_hist_y],
mode="lines",
name="Observed Stat",
line=dict(color="black", dash="dash", width=1.5)
))

def hex_to_rgba(hex_color, alpha=0.4):
"""Convierte un color HEX a RGBA con transparencia controlada."""
hex_color = hex_color.lstrip("#")
r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
return f"rgba({r},{g},{b},{alpha})"


significance_color = f"rgba(187,178,199,0.3)"

fig.add_trace(go.Scatter(
x=[upper_bound, max(null_stats), max(null_stats), upper_bound],

y=[0, 0, max_hist_y, max_hist_y],
y=[0, 0, max_hist_y, max_hist_y],
fill="toself",
fillcolor=hex_to_rgba(purple_light, 0.3),
line=dict(color="rgba(255,0,0,0)"),
fillcolor=significance_color,
line=dict(width=0),
name="Upper Significance Zone"
))

fig.add_trace(go.Scatter(
x=[min(null_stats), lower_bound, lower_bound, min(null_stats), min(null_stats)],
y=[0, 0, max_hist_y, max_hist_y, 0],
x=[min(null_stats), lower_bound, lower_bound, min(null_stats)],
y=[0, 0, max_hist_y, max_hist_y],
fill="toself",
fillcolor=hex_to_rgba(purple_light, 0.3),
line=dict(color="rgba(255,0,0,0)"),
fillcolor=significance_color,
line=dict(width=0),
name="Lower Significance Zone"
))

Expand All @@ -1201,9 +1248,7 @@ def hex_to_rgba(hex_color, alpha=0.4):
xaxis_title="Conformity Score",
yaxis_title="Density",
template="plotly_white",

bargap=0

)

return fig
Expand Down Expand Up @@ -1550,4 +1595,4 @@ def calculate_confidence_bands(data, alpha=0.05):
lower_bound = data - margin
upper_bound = data + margin

return lower_bound, upper_bound
return lower_bound, upper_bound
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ data = pd.DataFrame({
```python
results = run_geo_analysis(
data = data,
excluded_locations = ['mexico city', 'méxico'],
excluded_locations = ['mexico city', 'mexico'],
maximum_treatment_percentage=0.30,
significance_level = 0.1,
deltas_range = (0.01, 0.3, 0.02),
Expand Down
46 changes: 26 additions & 20 deletions experimental_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@

def generate_pdf(treatment_group, control_group, holdout_percentage,
impact_graph,percenge_lift,p_value,power,period,
permutation_test,treatment_day,firt_day,last_day,
col_target,metric_mmm,mmm_option,lift_total,firt_report_day,second_report_day,
permutation_test,treatment_day,first_day,last_day,
col_target,metric_mmm,mmm_option,lift_total,first_report_day,second_report_day,
pre_treatment,pre_counterfactual,post_treatment,post_counterfactual,att,incremental,df,spend):
"""
Generates a PDF report with explanations for each aspect.
Expand Down Expand Up @@ -94,7 +94,7 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
pdf.set_font("Poppins", size=10)
pdf.set_text_color(33, 31, 36)
pdf.multi_cell(0,5 , f"This report provides information about the the results of the analysis of a treatment on the variable '{col_target}' with a duration of {period} days. "
f"The data included in the design have a period of {firt_day} to {last_day} where the treatment started on {firt_day} until {last_day}."
f"The data included in the design have a period of {first_day} to {last_day} where the treatment started on {treatment_day} until {last_day}."
f"It includes information about the treatment group, control group, and the statistics results of the analysis.")


Expand Down Expand Up @@ -187,8 +187,8 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,

header_texts = [
"Group",
f"Pre-treatment\n({firt_report_day} to {second_report_day})",
f"Post-treatment\n({firt_day} to {last_day})",
f"Pre-treatment\n({first_report_day} to {second_report_day})",
f"Post-treatment\n({treatment_day} to {last_day})",
"Increment"
]

Expand Down Expand Up @@ -440,10 +440,10 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
st.session_state.incremental_report = None
if 'last_day' not in st.session_state:
st.session_state.last_day = None
if 'firt_day' not in st.session_state:
st.session_state.firt_day = None
if 'firt_report_day' not in st.session_state:
st.session_state.firt_report_day = None
if 'first_day' not in st.session_state:
st.session_state.first_day = None
if 'first_report_day' not in st.session_state:
st.session_state.first_report_day = None
if 'second_report_day' not in st.session_state:
st.session_state.second_report_day = None

Expand Down Expand Up @@ -560,16 +560,16 @@ def reset_states():
st.subheader("3. Experimental evaluation")
random_sate = data1['location'].unique()[0]
filtered_data = data1[data1['location'] == random_sate]
firt_day = filtered_data['time'].min()
first_day = filtered_data['time'].min()
last_day = filtered_data['time'].max()





st.text("Parameter configuration")
start_treatment = st.date_input("Treatment start date",min_value=firt_day,max_value=last_day,value=firt_day)
end_treatment = st.date_input("Treatment end date",min_value=firt_day,max_value=last_day,value=last_day)
start_treatment = st.date_input("Treatment start date",min_value=first_day,max_value=last_day,value=first_day)
end_treatment = st.date_input("Treatment end date",min_value=first_day,max_value=last_day,value=last_day)
treatment_group = st.multiselect("Select treatment group", data1['location'].unique())
spend = st.number_input("Select spend")
mmm_option = st.selectbox("Select the option to calculate the iROAS or iCPA", ["iROAS", "iCPA"])
Expand Down Expand Up @@ -656,7 +656,7 @@ def reset_states():
st.session_state.permutation_test_report = plot_permutation_test_report(results)
st.session_state.period = period
second_report_day = last_day - pd.Timedelta(days=period)
firt_report_day = last_day - pd.Timedelta(days=(period*2)-1)
first_report_day = last_day - pd.Timedelta(days=(period*2)-1)
treatment_day = last_day - pd.Timedelta(days=period-1)


Expand Down Expand Up @@ -688,10 +688,10 @@ def reset_states():



if mmm_option == "iROAS":
if mmm_option == "iCPA":
st.session_state.metric_mmm = spend / st.session_state.incremental
else:
st.session_state.metric_mmm = spend / st.session_state.incremental
st.session_state.metric_mmm = st.session_state.incremental / spend



Expand All @@ -711,12 +711,18 @@ def reset_states():
last_day = pd.to_datetime(last_day)
treatment_day = last_day - pd.Timedelta(days=end_position_treatment - start_position_treatment)
second_report_day = last_day - pd.Timedelta(days=st.session_state.period)
firt_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
first_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
treatment_day = last_day - pd.Timedelta(days=st.session_state.period-1)
treatment_day = treatment_day.strftime('%Y-%m-%d')
last_day = last_day.strftime('%Y-%m-%d')
firt_day = firt_day.strftime('%Y-%m-%d')
firt_report_day = firt_report_day.strftime('%Y-%m-%d')
first_day = first_day.strftime('%Y-%m-%d')
first_report_day = first_report_day.strftime('%Y-%m-%d')
second_report_day = second_report_day.strftime('%Y-%m-%d')
# st.write(f"Last day: {last_day}")
# st.write(f"First day: {first_day}")
# st.write(f"First report day: {first_report_day}")
# st.write(f"Second report day: {second_report_day}")
# st.write(f"Treatment day: {treatment_day}")



Expand Down Expand Up @@ -772,13 +778,13 @@ def reset_states():
st.session_state.period,
st.session_state.permutation_test_report,
treatment_day,
firt_day,
first_day,
last_day,
col_target,
st.session_state.metric_mmm,
st.session_state.mmm_option,
st.session_state.lift_total,
firt_report_day,
first_report_day,
second_report_day,
st.session_state.pre_treatment,
st.session_state.pre_counterfactual,
Expand Down
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
"tqdm",
"matplotlib",
"seaborn",
"plotly"
"plotly",
"millify",
"statsmodels",

],
author="Entropy Team",
author_email="dev@entropy.tech",
Expand Down