entropyx · IsaacMtz19 · Apr 24, 2025 · Apr 9, 2025 · Apr 14, 2025 · Apr 18, 2025
diff --git a/Murray/plots.py b/Murray/plots.py
@@ -385,13 +385,23 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
                    tickfont=dict(size=12, color='black'))
     )
 
-
     custom_data = []
-    for s in sorted_sizes:
-        custom_data.append([s] * len(periods))
+    for i, s in enumerate(sorted_sizes):
+        mde_data = []
+        for period in periods:
+            mde = sensitivity_results[s][period].get('MDE', None)
+            mde_data.append([
+                s,  # Treatment size
+                f"{mde:.2%}" if mde is not None else "N/A"  # MDE
+            ])
+        custom_data.append(mde_data)
 
     fig.data[0].customdata = custom_data
-    fig.data[0].hovertemplate = "Treatment size: %{customdata}<br><extra></extra>"
+    fig.data[0].hovertemplate = (
+        "Treatment size: %{customdata[0]}<br>" +
+        "MDE: %{customdata[1]}<br>" +
+        "<extra></extra>"
+    )
     fig.data[0].hoverinfo = "skip"
 
     return fig
@@ -400,6 +410,39 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 def print_weights(geo_test, treatment_percentage=None, num_locations=None):
     """
     Extracts control group weights based on holdout percentage or number of locations.
@@ -686,11 +729,26 @@ def print_incremental_results(geo_test, period, treatment_percentage):
     holdout_percentage = 100 - treatment_percentage
     title = "Incremental Results"
     att, incremental, fig = plot_impact_streamlit_app(geo_test, period, holdout_percentage)
+
+    # Get the MDE from the sensitivity_results
+    sensitivity_results = geo_test['sensitivity_results']
+    results_by_size = geo_test['simulation_results']
+
+    target_size_key = None
+    target_mde = None
+    for size_key, result in results_by_size.items():
+        current_holdout = result['Holdout Percentage']
+        if abs(current_holdout - holdout_percentage) < 0.01:
+            target_size_key = size_key
+            target_mde = sensitivity_results[size_key][period].get('MDE', None)
+            break
+
     print("=" * 30)
     print(title.center(30))
     print("=" * 30)
     print(f"ATT: {round(att,2)}")
     print(f"Lift total: {round(incremental,2)}")
+    print(f"MDE: {round(target_mde*100,2)}%")
     print("=" * 30)
 
 
@@ -913,12 +971,19 @@ def plot_impact_evaluation(results_evaluation):
         treatment (array): Treatment group values
         period (int): Treatment period length
     """
+
 
     counterfactual = results_evaluation['predictions']
     treatment = results_evaluation['treatment']
     period = results_evaluation['period']
     length_treatment = results_evaluation['length_treatment']
 
+    if len(treatment.shape) > 1:
+        treatment = treatment.squeeze()
+
+    if len(counterfactual.shape) > 1:
+        counterfactual = counterfactual.squeeze()
+
     point_difference = treatment - counterfactual
     cumulative_effect = ([0] * (len(treatment) - period)) + (np.cumsum(point_difference[len(treatment)-period:])).tolist()
 
@@ -1118,81 +1183,63 @@ def plot_permutation_test(results_evaluation, Significance_level=0.1):
     Returns:
         fig: Plotly figure.
     """
-
     null_stats = results_evaluation['null_stats']
     observed_stat = results_evaluation['observed_stat']
 
-
     upper_bound = np.percentile(null_stats, 100 * (1 - (Significance_level / 2)))
     lower_bound = np.percentile(null_stats, 100 * (Significance_level / 2))
 
-
-
-    kde = stats.gaussian_kde(null_stats)
-    x_kde = np.linspace(min(null_stats), max(null_stats), 300)
+    kde = stats.gaussian_kde(null_stats, bw_method='scott')  
+    x_kde = np.linspace(min(null_stats), max(null_stats), 100)  
     y_kde = kde(x_kde)
 
-
-    max_hist_y = max(kde(null_stats))  
-
+    max_hist_y = np.max(y_kde) * 1.1  
 
     fig = go.Figure()
 
-
     fig.add_trace(go.Histogram(
         x=null_stats,
-        nbinsx=30,
+        nbinsx=20,  
         histnorm='probability density',
         name="Null Stats",
-        marker=dict(color=blue,line=dict(color="black",width=1)),
+        marker=dict(color=blue, line=dict(color="black", width=1)),
         opacity=0.6
     ))
 
-
     fig.add_trace(go.Scatter(
         x=x_kde,
         y=y_kde,
         mode="lines",
         name="KDE Density",
         showlegend=False,
-
         line=dict(color="darkblue", width=2)
     ))
 
-
-
     fig.add_trace(go.Scatter(
         x=[observed_stat, observed_stat],
-        y=[0, max_hist_y],  
+        y=[0, max_hist_y],
         mode="lines",
         name="Observed Stat",
         line=dict(color="black", dash="dash", width=1.5)
     ))
 
-    def hex_to_rgba(hex_color, alpha=0.4):
-      """Convierte un color HEX a RGBA con transparencia controlada."""
-      hex_color = hex_color.lstrip("#")
-      r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
-      return f"rgba({r},{g},{b},{alpha})"
-
-
+    significance_color = f"rgba(187,178,199,0.3)"  
 
     fig.add_trace(go.Scatter(
         x=[upper_bound, max(null_stats), max(null_stats), upper_bound],
-
-        y=[0, 0, max_hist_y, max_hist_y],  
+        y=[0, 0, max_hist_y, max_hist_y],
         fill="toself",
-        fillcolor=hex_to_rgba(purple_light, 0.3),  
-        line=dict(color="rgba(255,0,0,0)"),
+        fillcolor=significance_color,
+        line=dict(width=0),
         name="Upper Significance Zone"
     ))
 
     fig.add_trace(go.Scatter(
-        x=[min(null_stats), lower_bound, lower_bound, min(null_stats), min(null_stats)],
-        y=[0, 0, max_hist_y, max_hist_y, 0],  
+        x=[min(null_stats), lower_bound, lower_bound, min(null_stats)],
+        y=[0, 0, max_hist_y, max_hist_y],
         fill="toself",
-        fillcolor=hex_to_rgba(purple_light, 0.3),  
-        line=dict(color="rgba(255,0,0,0)"),
+        fillcolor=significance_color,
+        line=dict(width=0),
         name="Lower Significance Zone"
     ))
 
@@ -1201,9 +1248,7 @@ def hex_to_rgba(hex_color, alpha=0.4):
         xaxis_title="Conformity Score",
         yaxis_title="Density",
         template="plotly_white",
-
         bargap=0
-
     )
 
     return fig
@@ -1550,4 +1595,4 @@ def calculate_confidence_bands(data, alpha=0.05):
     lower_bound = data - margin
     upper_bound = data + margin
 
-    return lower_bound, upper_bound 
+    return lower_bound, upper_bound  
diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ data = pd.DataFrame({
 ```python
 results = run_geo_analysis(
     data = data,
-    excluded_locations = ['mexico city', 'méxico'],
+    excluded_locations = ['mexico city', 'mexico'],
     maximum_treatment_percentage=0.30,
     significance_level = 0.1,
     deltas_range = (0.01, 0.3, 0.02),

diff --git a/experimental_evaluation.py b/experimental_evaluation.py
@@ -42,8 +42,8 @@
 
 def generate_pdf(treatment_group, control_group, holdout_percentage, 
                  impact_graph,percenge_lift,p_value,power,period,
-                 permutation_test,treatment_day,firt_day,last_day,
-                 col_target,metric_mmm,mmm_option,lift_total,firt_report_day,second_report_day,
+                 permutation_test,treatment_day,first_day,last_day,
+                 col_target,metric_mmm,mmm_option,lift_total,first_report_day,second_report_day,
                  pre_treatment,pre_counterfactual,post_treatment,post_counterfactual,att,incremental,df,spend):
         """
         Generates a PDF report with explanations for each aspect.
@@ -94,7 +94,7 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
         pdf.set_font("Poppins", size=10)
         pdf.set_text_color(33, 31, 36)
         pdf.multi_cell(0,5 , f"This report provides information about the the results of the analysis of a treatment on the variable '{col_target}' with a duration of {period} days. "
-                       f"The data included in the design have a period of {firt_day} to {last_day} where the treatment started on {firt_day} until {last_day}."
+                       f"The data included in the design have a period of {first_day} to {last_day} where the treatment started on {treatment_day} until {last_day}."
                        f"It includes information about the treatment group, control group, and the statistics results of the analysis.")
 
 
@@ -187,8 +187,8 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
 
         header_texts = [
             "Group",
-            f"Pre-treatment\n({firt_report_day} to {second_report_day})",
-            f"Post-treatment\n({firt_day} to {last_day})",
+            f"Pre-treatment\n({first_report_day} to {second_report_day})",
+            f"Post-treatment\n({treatment_day} to {last_day})",
             "Increment"
         ]
 
@@ -440,10 +440,10 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
         st.session_state.incremental_report = None
 if 'last_day' not in st.session_state:
         st.session_state.last_day = None
-if 'firt_day' not in st.session_state:
-        st.session_state.firt_day = None
-if 'firt_report_day' not in st.session_state:
-        st.session_state.firt_report_day = None
+if 'first_day' not in st.session_state:
+        st.session_state.first_day = None
+if 'first_report_day' not in st.session_state:
+        st.session_state.first_report_day = None
 if 'second_report_day' not in st.session_state:
         st.session_state.second_report_day = None
 
@@ -560,16 +560,16 @@ def reset_states():
             st.subheader("3. Experimental evaluation")
             random_sate = data1['location'].unique()[0]
             filtered_data = data1[data1['location'] == random_sate]
-            firt_day = filtered_data['time'].min()
+            first_day = filtered_data['time'].min()
             last_day = filtered_data['time'].max()
 
 
 
 
 
             st.text("Parameter configuration")
-            start_treatment = st.date_input("Treatment start date",min_value=firt_day,max_value=last_day,value=firt_day)
-            end_treatment = st.date_input("Treatment end date",min_value=firt_day,max_value=last_day,value=last_day)
+            start_treatment = st.date_input("Treatment start date",min_value=first_day,max_value=last_day,value=first_day)
+            end_treatment = st.date_input("Treatment end date",min_value=first_day,max_value=last_day,value=last_day)
             treatment_group = st.multiselect("Select treatment group", data1['location'].unique())
             spend = st.number_input("Select spend")
             mmm_option = st.selectbox("Select the option to calculate the iROAS or iCPA", ["iROAS", "iCPA"])
@@ -656,7 +656,7 @@ def reset_states():
                         st.session_state.permutation_test_report = plot_permutation_test_report(results)
                         st.session_state.period = period
                         second_report_day = last_day - pd.Timedelta(days=period)
-                        firt_report_day = last_day - pd.Timedelta(days=(period*2)-1)
+                        first_report_day = last_day - pd.Timedelta(days=(period*2)-1)
                         treatment_day = last_day - pd.Timedelta(days=period-1)
 
 
@@ -688,10 +688,10 @@ def reset_states():
 
 
 
-                if mmm_option == "iROAS":
+                if mmm_option == "iCPA":
                     st.session_state.metric_mmm = spend / st.session_state.incremental 
                 else:
-                    st.session_state.metric_mmm = spend / st.session_state.incremental 
+                    st.session_state.metric_mmm = st.session_state.incremental / spend 
 
 
 
@@ -711,12 +711,18 @@ def reset_states():
                 last_day = pd.to_datetime(last_day)
                 treatment_day = last_day - pd.Timedelta(days=end_position_treatment - start_position_treatment)
                 second_report_day = last_day - pd.Timedelta(days=st.session_state.period)
-                firt_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
+                first_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
                 treatment_day = last_day - pd.Timedelta(days=st.session_state.period-1)
+                treatment_day = treatment_day.strftime('%Y-%m-%d')
                 last_day = last_day.strftime('%Y-%m-%d')
-                firt_day = firt_day.strftime('%Y-%m-%d')
-                firt_report_day = firt_report_day.strftime('%Y-%m-%d')
+                first_day = first_day.strftime('%Y-%m-%d')
+                first_report_day = first_report_day.strftime('%Y-%m-%d')
                 second_report_day = second_report_day.strftime('%Y-%m-%d')
+                # st.write(f"Last day: {last_day}")
+                # st.write(f"First day: {first_day}")
+                # st.write(f"First report day: {first_report_day}")
+                # st.write(f"Second report day: {second_report_day}")
+                # st.write(f"Treatment day: {treatment_day}")
 
 
 
@@ -772,13 +778,13 @@ def reset_states():
                         st.session_state.period,
                         st.session_state.permutation_test_report,
                         treatment_day,
-                        firt_day,
+                        first_day,
                         last_day,
                         col_target,
                         st.session_state.metric_mmm,
                         st.session_state.mmm_option,
                         st.session_state.lift_total,
-                        firt_report_day,
+                        first_report_day,
                         second_report_day,
                         st.session_state.pre_treatment,
                         st.session_state.pre_counterfactual,

diff --git a/setup.py b/setup.py
@@ -17,7 +17,10 @@
         "tqdm",
         "matplotlib",
         "seaborn",
-        "plotly"
+        "plotly",
+        "millify",
+        "statsmodels",
+
     ],
     author="Entropy Team",
     author_email="dev@entropy.tech",