diff --git a/Murray/plots.py b/Murray/plots.py
index 76758a2..fff71b2 100644
--- a/Murray/plots.py
+++ b/Murray/plots.py
@@ -385,13 +385,23 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
tickfont=dict(size=12, color='black'))
)
-
custom_data = []
- for s in sorted_sizes:
- custom_data.append([s] * len(periods))
+ for i, s in enumerate(sorted_sizes):
+ mde_data = []
+ for period in periods:
+ mde = sensitivity_results[s][period].get('MDE', None)
+ mde_data.append([
+ s, # Treatment size
+ f"{mde:.2%}" if mde is not None else "N/A" # MDE
+ ])
+ custom_data.append(mde_data)
fig.data[0].customdata = custom_data
- fig.data[0].hovertemplate = "Treatment size: %{customdata}
"
+ fig.data[0].hovertemplate = (
+ "Treatment size: %{customdata[0]}
" +
+ "MDE: %{customdata[1]}
" +
+ ""
+ )
fig.data[0].hoverinfo = "skip"
return fig
@@ -400,6 +410,39 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
def print_weights(geo_test, treatment_percentage=None, num_locations=None):
"""
Extracts control group weights based on holdout percentage or number of locations.
@@ -686,11 +729,26 @@ def print_incremental_results(geo_test, period, treatment_percentage):
holdout_percentage = 100 - treatment_percentage
title = "Incremental Results"
att, incremental, fig = plot_impact_streamlit_app(geo_test, period, holdout_percentage)
+
+ # Get the MDE from the sensitivity_results
+ sensitivity_results = geo_test['sensitivity_results']
+ results_by_size = geo_test['simulation_results']
+
+ target_size_key = None
+ target_mde = None
+ for size_key, result in results_by_size.items():
+ current_holdout = result['Holdout Percentage']
+ if abs(current_holdout - holdout_percentage) < 0.01:
+ target_size_key = size_key
+ target_mde = sensitivity_results[size_key][period].get('MDE', None)
+ break
+
print("=" * 30)
print(title.center(30))
print("=" * 30)
print(f"ATT: {round(att,2)}")
print(f"Lift total: {round(incremental,2)}")
+ print(f"MDE: {round(target_mde*100,2)}%")
print("=" * 30)
@@ -913,12 +971,19 @@ def plot_impact_evaluation(results_evaluation):
treatment (array): Treatment group values
period (int): Treatment period length
"""
+
counterfactual = results_evaluation['predictions']
treatment = results_evaluation['treatment']
period = results_evaluation['period']
length_treatment = results_evaluation['length_treatment']
+ if len(treatment.shape) > 1:
+ treatment = treatment.squeeze()
+
+ if len(counterfactual.shape) > 1:
+ counterfactual = counterfactual.squeeze()
+
point_difference = treatment - counterfactual
cumulative_effect = ([0] * (len(treatment) - period)) + (np.cumsum(point_difference[len(treatment)-period:])).tolist()
@@ -1118,81 +1183,63 @@ def plot_permutation_test(results_evaluation, Significance_level=0.1):
Returns:
fig: Plotly figure.
"""
-
null_stats = results_evaluation['null_stats']
observed_stat = results_evaluation['observed_stat']
-
upper_bound = np.percentile(null_stats, 100 * (1 - (Significance_level / 2)))
lower_bound = np.percentile(null_stats, 100 * (Significance_level / 2))
-
-
- kde = stats.gaussian_kde(null_stats)
- x_kde = np.linspace(min(null_stats), max(null_stats), 300)
+ kde = stats.gaussian_kde(null_stats, bw_method='scott')
+ x_kde = np.linspace(min(null_stats), max(null_stats), 100)
y_kde = kde(x_kde)
-
- max_hist_y = max(kde(null_stats))
-
+ max_hist_y = np.max(y_kde) * 1.1
fig = go.Figure()
-
fig.add_trace(go.Histogram(
x=null_stats,
- nbinsx=30,
+ nbinsx=20,
histnorm='probability density',
name="Null Stats",
- marker=dict(color=blue,line=dict(color="black",width=1)),
+ marker=dict(color=blue, line=dict(color="black", width=1)),
opacity=0.6
))
-
fig.add_trace(go.Scatter(
x=x_kde,
y=y_kde,
mode="lines",
name="KDE Density",
showlegend=False,
-
line=dict(color="darkblue", width=2)
))
-
-
fig.add_trace(go.Scatter(
x=[observed_stat, observed_stat],
- y=[0, max_hist_y],
+ y=[0, max_hist_y],
mode="lines",
name="Observed Stat",
line=dict(color="black", dash="dash", width=1.5)
))
- def hex_to_rgba(hex_color, alpha=0.4):
- """Convierte un color HEX a RGBA con transparencia controlada."""
- hex_color = hex_color.lstrip("#")
- r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
- return f"rgba({r},{g},{b},{alpha})"
-
-
+ significance_color = f"rgba(187,178,199,0.3)"
fig.add_trace(go.Scatter(
x=[upper_bound, max(null_stats), max(null_stats), upper_bound],
-
- y=[0, 0, max_hist_y, max_hist_y],
+ y=[0, 0, max_hist_y, max_hist_y],
fill="toself",
- fillcolor=hex_to_rgba(purple_light, 0.3),
- line=dict(color="rgba(255,0,0,0)"),
+ fillcolor=significance_color,
+ line=dict(width=0),
name="Upper Significance Zone"
))
fig.add_trace(go.Scatter(
- x=[min(null_stats), lower_bound, lower_bound, min(null_stats), min(null_stats)],
- y=[0, 0, max_hist_y, max_hist_y, 0],
+ x=[min(null_stats), lower_bound, lower_bound, min(null_stats)],
+ y=[0, 0, max_hist_y, max_hist_y],
fill="toself",
- fillcolor=hex_to_rgba(purple_light, 0.3),
- line=dict(color="rgba(255,0,0,0)"),
+ fillcolor=significance_color,
+ line=dict(width=0),
name="Lower Significance Zone"
))
@@ -1201,9 +1248,7 @@ def hex_to_rgba(hex_color, alpha=0.4):
xaxis_title="Conformity Score",
yaxis_title="Density",
template="plotly_white",
-
bargap=0
-
)
return fig
@@ -1550,4 +1595,4 @@ def calculate_confidence_bands(data, alpha=0.05):
lower_bound = data - margin
upper_bound = data + margin
- return lower_bound, upper_bound
\ No newline at end of file
+ return lower_bound, upper_bound
\ No newline at end of file
diff --git a/README.md b/README.md
index be78874..cfe945b 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ data = pd.DataFrame({
```python
results = run_geo_analysis(
data = data,
- excluded_locations = ['mexico city', 'méxico'],
+ excluded_locations = ['mexico city', 'mexico'],
maximum_treatment_percentage=0.30,
significance_level = 0.1,
deltas_range = (0.01, 0.3, 0.02),
diff --git a/experimental_evaluation.py b/experimental_evaluation.py
index fe2663f..ae6e7c7 100644
--- a/experimental_evaluation.py
+++ b/experimental_evaluation.py
@@ -42,8 +42,8 @@
def generate_pdf(treatment_group, control_group, holdout_percentage,
impact_graph,percenge_lift,p_value,power,period,
- permutation_test,treatment_day,firt_day,last_day,
- col_target,metric_mmm,mmm_option,lift_total,firt_report_day,second_report_day,
+ permutation_test,treatment_day,first_day,last_day,
+ col_target,metric_mmm,mmm_option,lift_total,first_report_day,second_report_day,
pre_treatment,pre_counterfactual,post_treatment,post_counterfactual,att,incremental,df,spend):
"""
Generates a PDF report with explanations for each aspect.
@@ -94,7 +94,7 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
pdf.set_font("Poppins", size=10)
pdf.set_text_color(33, 31, 36)
pdf.multi_cell(0,5 , f"This report provides information about the the results of the analysis of a treatment on the variable '{col_target}' with a duration of {period} days. "
- f"The data included in the design have a period of {firt_day} to {last_day} where the treatment started on {firt_day} until {last_day}."
+ f"The data included in the design have a period of {first_day} to {last_day} where the treatment started on {treatment_day} until {last_day}."
f"It includes information about the treatment group, control group, and the statistics results of the analysis.")
@@ -187,8 +187,8 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
header_texts = [
"Group",
- f"Pre-treatment\n({firt_report_day} to {second_report_day})",
- f"Post-treatment\n({firt_day} to {last_day})",
+ f"Pre-treatment\n({first_report_day} to {second_report_day})",
+ f"Post-treatment\n({treatment_day} to {last_day})",
"Increment"
]
@@ -440,10 +440,10 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
st.session_state.incremental_report = None
if 'last_day' not in st.session_state:
st.session_state.last_day = None
-if 'firt_day' not in st.session_state:
- st.session_state.firt_day = None
-if 'firt_report_day' not in st.session_state:
- st.session_state.firt_report_day = None
+if 'first_day' not in st.session_state:
+ st.session_state.first_day = None
+if 'first_report_day' not in st.session_state:
+ st.session_state.first_report_day = None
if 'second_report_day' not in st.session_state:
st.session_state.second_report_day = None
@@ -560,7 +560,7 @@ def reset_states():
st.subheader("3. Experimental evaluation")
random_sate = data1['location'].unique()[0]
filtered_data = data1[data1['location'] == random_sate]
- firt_day = filtered_data['time'].min()
+ first_day = filtered_data['time'].min()
last_day = filtered_data['time'].max()
@@ -568,8 +568,8 @@ def reset_states():
st.text("Parameter configuration")
- start_treatment = st.date_input("Treatment start date",min_value=firt_day,max_value=last_day,value=firt_day)
- end_treatment = st.date_input("Treatment end date",min_value=firt_day,max_value=last_day,value=last_day)
+ start_treatment = st.date_input("Treatment start date",min_value=first_day,max_value=last_day,value=first_day)
+ end_treatment = st.date_input("Treatment end date",min_value=first_day,max_value=last_day,value=last_day)
treatment_group = st.multiselect("Select treatment group", data1['location'].unique())
spend = st.number_input("Select spend")
mmm_option = st.selectbox("Select the option to calculate the iROAS or iCPA", ["iROAS", "iCPA"])
@@ -656,7 +656,7 @@ def reset_states():
st.session_state.permutation_test_report = plot_permutation_test_report(results)
st.session_state.period = period
second_report_day = last_day - pd.Timedelta(days=period)
- firt_report_day = last_day - pd.Timedelta(days=(period*2)-1)
+ first_report_day = last_day - pd.Timedelta(days=(period*2)-1)
treatment_day = last_day - pd.Timedelta(days=period-1)
@@ -688,10 +688,10 @@ def reset_states():
- if mmm_option == "iROAS":
+ if mmm_option == "iCPA":
st.session_state.metric_mmm = spend / st.session_state.incremental
else:
- st.session_state.metric_mmm = spend / st.session_state.incremental
+ st.session_state.metric_mmm = st.session_state.incremental / spend
@@ -711,12 +711,18 @@ def reset_states():
last_day = pd.to_datetime(last_day)
treatment_day = last_day - pd.Timedelta(days=end_position_treatment - start_position_treatment)
second_report_day = last_day - pd.Timedelta(days=st.session_state.period)
- firt_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
+ first_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
treatment_day = last_day - pd.Timedelta(days=st.session_state.period-1)
+ treatment_day = treatment_day.strftime('%Y-%m-%d')
last_day = last_day.strftime('%Y-%m-%d')
- firt_day = firt_day.strftime('%Y-%m-%d')
- firt_report_day = firt_report_day.strftime('%Y-%m-%d')
+ first_day = first_day.strftime('%Y-%m-%d')
+ first_report_day = first_report_day.strftime('%Y-%m-%d')
second_report_day = second_report_day.strftime('%Y-%m-%d')
+ # st.write(f"Last day: {last_day}")
+ # st.write(f"First day: {first_day}")
+ # st.write(f"First report day: {first_report_day}")
+ # st.write(f"Second report day: {second_report_day}")
+ # st.write(f"Treatment day: {treatment_day}")
@@ -772,13 +778,13 @@ def reset_states():
st.session_state.period,
st.session_state.permutation_test_report,
treatment_day,
- firt_day,
+ first_day,
last_day,
col_target,
st.session_state.metric_mmm,
st.session_state.mmm_option,
st.session_state.lift_total,
- firt_report_day,
+ first_report_day,
second_report_day,
st.session_state.pre_treatment,
st.session_state.pre_counterfactual,
diff --git a/setup.py b/setup.py
index 05f55de..7c4bf27 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,10 @@
"tqdm",
"matplotlib",
"seaborn",
- "plotly"
+ "plotly",
+ "millify",
+ "statsmodels",
+
],
author="Entropy Team",
author_email="dev@entropy.tech",