diff --git a/Murray/plots.py b/Murray/plots.py index 76758a2..fff71b2 100644 --- a/Murray/plots.py +++ b/Murray/plots.py @@ -385,13 +385,23 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz tickfont=dict(size=12, color='black')) ) - custom_data = [] - for s in sorted_sizes: - custom_data.append([s] * len(periods)) + for i, s in enumerate(sorted_sizes): + mde_data = [] + for period in periods: + mde = sensitivity_results[s][period].get('MDE', None) + mde_data.append([ + s, # Treatment size + f"{mde:.2%}" if mde is not None else "N/A" # MDE + ]) + custom_data.append(mde_data) fig.data[0].customdata = custom_data - fig.data[0].hovertemplate = "Treatment size: %{customdata}
" + fig.data[0].hovertemplate = ( + "Treatment size: %{customdata[0]}
" + + "MDE: %{customdata[1]}
" + + "" + ) fig.data[0].hoverinfo = "skip" return fig @@ -400,6 +410,39 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + def print_weights(geo_test, treatment_percentage=None, num_locations=None): """ Extracts control group weights based on holdout percentage or number of locations. @@ -686,11 +729,26 @@ def print_incremental_results(geo_test, period, treatment_percentage): holdout_percentage = 100 - treatment_percentage title = "Incremental Results" att, incremental, fig = plot_impact_streamlit_app(geo_test, period, holdout_percentage) + + # Get the MDE from the sensitivity_results + sensitivity_results = geo_test['sensitivity_results'] + results_by_size = geo_test['simulation_results'] + + target_size_key = None + target_mde = None + for size_key, result in results_by_size.items(): + current_holdout = result['Holdout Percentage'] + if abs(current_holdout - holdout_percentage) < 0.01: + target_size_key = size_key + target_mde = sensitivity_results[size_key][period].get('MDE', None) + break + print("=" * 30) print(title.center(30)) print("=" * 30) print(f"ATT: {round(att,2)}") print(f"Lift total: {round(incremental,2)}") + print(f"MDE: {round(target_mde*100,2)}%") print("=" * 30) @@ -913,12 +971,19 @@ def plot_impact_evaluation(results_evaluation): treatment (array): Treatment group values period (int): Treatment period length """ + counterfactual = results_evaluation['predictions'] treatment = results_evaluation['treatment'] period = results_evaluation['period'] length_treatment = results_evaluation['length_treatment'] + if len(treatment.shape) > 1: + treatment = treatment.squeeze() + + if len(counterfactual.shape) > 1: + counterfactual = counterfactual.squeeze() + point_difference = treatment - counterfactual cumulative_effect = ([0] * (len(treatment) - period)) + (np.cumsum(point_difference[len(treatment)-period:])).tolist() @@ -1118,81 +1183,63 @@ def plot_permutation_test(results_evaluation, Significance_level=0.1): Returns: fig: Plotly figure. """ - null_stats = results_evaluation['null_stats'] observed_stat = results_evaluation['observed_stat'] - upper_bound = np.percentile(null_stats, 100 * (1 - (Significance_level / 2))) lower_bound = np.percentile(null_stats, 100 * (Significance_level / 2)) - - - kde = stats.gaussian_kde(null_stats) - x_kde = np.linspace(min(null_stats), max(null_stats), 300) + kde = stats.gaussian_kde(null_stats, bw_method='scott') + x_kde = np.linspace(min(null_stats), max(null_stats), 100) y_kde = kde(x_kde) - - max_hist_y = max(kde(null_stats)) - + max_hist_y = np.max(y_kde) * 1.1 fig = go.Figure() - fig.add_trace(go.Histogram( x=null_stats, - nbinsx=30, + nbinsx=20, histnorm='probability density', name="Null Stats", - marker=dict(color=blue,line=dict(color="black",width=1)), + marker=dict(color=blue, line=dict(color="black", width=1)), opacity=0.6 )) - fig.add_trace(go.Scatter( x=x_kde, y=y_kde, mode="lines", name="KDE Density", showlegend=False, - line=dict(color="darkblue", width=2) )) - - fig.add_trace(go.Scatter( x=[observed_stat, observed_stat], - y=[0, max_hist_y], + y=[0, max_hist_y], mode="lines", name="Observed Stat", line=dict(color="black", dash="dash", width=1.5) )) - def hex_to_rgba(hex_color, alpha=0.4): - """Convierte un color HEX a RGBA con transparencia controlada.""" - hex_color = hex_color.lstrip("#") - r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4)) - return f"rgba({r},{g},{b},{alpha})" - - + significance_color = f"rgba(187,178,199,0.3)" fig.add_trace(go.Scatter( x=[upper_bound, max(null_stats), max(null_stats), upper_bound], - - y=[0, 0, max_hist_y, max_hist_y], + y=[0, 0, max_hist_y, max_hist_y], fill="toself", - fillcolor=hex_to_rgba(purple_light, 0.3), - line=dict(color="rgba(255,0,0,0)"), + fillcolor=significance_color, + line=dict(width=0), name="Upper Significance Zone" )) fig.add_trace(go.Scatter( - x=[min(null_stats), lower_bound, lower_bound, min(null_stats), min(null_stats)], - y=[0, 0, max_hist_y, max_hist_y, 0], + x=[min(null_stats), lower_bound, lower_bound, min(null_stats)], + y=[0, 0, max_hist_y, max_hist_y], fill="toself", - fillcolor=hex_to_rgba(purple_light, 0.3), - line=dict(color="rgba(255,0,0,0)"), + fillcolor=significance_color, + line=dict(width=0), name="Lower Significance Zone" )) @@ -1201,9 +1248,7 @@ def hex_to_rgba(hex_color, alpha=0.4): xaxis_title="Conformity Score", yaxis_title="Density", template="plotly_white", - bargap=0 - ) return fig @@ -1550,4 +1595,4 @@ def calculate_confidence_bands(data, alpha=0.05): lower_bound = data - margin upper_bound = data + margin - return lower_bound, upper_bound \ No newline at end of file + return lower_bound, upper_bound \ No newline at end of file diff --git a/README.md b/README.md index be78874..cfe945b 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ data = pd.DataFrame({ ```python results = run_geo_analysis( data = data, - excluded_locations = ['mexico city', 'méxico'], + excluded_locations = ['mexico city', 'mexico'], maximum_treatment_percentage=0.30, significance_level = 0.1, deltas_range = (0.01, 0.3, 0.02), diff --git a/experimental_evaluation.py b/experimental_evaluation.py index fe2663f..ae6e7c7 100644 --- a/experimental_evaluation.py +++ b/experimental_evaluation.py @@ -42,8 +42,8 @@ def generate_pdf(treatment_group, control_group, holdout_percentage, impact_graph,percenge_lift,p_value,power,period, - permutation_test,treatment_day,firt_day,last_day, - col_target,metric_mmm,mmm_option,lift_total,firt_report_day,second_report_day, + permutation_test,treatment_day,first_day,last_day, + col_target,metric_mmm,mmm_option,lift_total,first_report_day,second_report_day, pre_treatment,pre_counterfactual,post_treatment,post_counterfactual,att,incremental,df,spend): """ Generates a PDF report with explanations for each aspect. @@ -94,7 +94,7 @@ def generate_pdf(treatment_group, control_group, holdout_percentage, pdf.set_font("Poppins", size=10) pdf.set_text_color(33, 31, 36) pdf.multi_cell(0,5 , f"This report provides information about the the results of the analysis of a treatment on the variable '{col_target}' with a duration of {period} days. " - f"The data included in the design have a period of {firt_day} to {last_day} where the treatment started on {firt_day} until {last_day}." + f"The data included in the design have a period of {first_day} to {last_day} where the treatment started on {treatment_day} until {last_day}." f"It includes information about the treatment group, control group, and the statistics results of the analysis.") @@ -187,8 +187,8 @@ def generate_pdf(treatment_group, control_group, holdout_percentage, header_texts = [ "Group", - f"Pre-treatment\n({firt_report_day} to {second_report_day})", - f"Post-treatment\n({firt_day} to {last_day})", + f"Pre-treatment\n({first_report_day} to {second_report_day})", + f"Post-treatment\n({treatment_day} to {last_day})", "Increment" ] @@ -440,10 +440,10 @@ def generate_pdf(treatment_group, control_group, holdout_percentage, st.session_state.incremental_report = None if 'last_day' not in st.session_state: st.session_state.last_day = None -if 'firt_day' not in st.session_state: - st.session_state.firt_day = None -if 'firt_report_day' not in st.session_state: - st.session_state.firt_report_day = None +if 'first_day' not in st.session_state: + st.session_state.first_day = None +if 'first_report_day' not in st.session_state: + st.session_state.first_report_day = None if 'second_report_day' not in st.session_state: st.session_state.second_report_day = None @@ -560,7 +560,7 @@ def reset_states(): st.subheader("3. Experimental evaluation") random_sate = data1['location'].unique()[0] filtered_data = data1[data1['location'] == random_sate] - firt_day = filtered_data['time'].min() + first_day = filtered_data['time'].min() last_day = filtered_data['time'].max() @@ -568,8 +568,8 @@ def reset_states(): st.text("Parameter configuration") - start_treatment = st.date_input("Treatment start date",min_value=firt_day,max_value=last_day,value=firt_day) - end_treatment = st.date_input("Treatment end date",min_value=firt_day,max_value=last_day,value=last_day) + start_treatment = st.date_input("Treatment start date",min_value=first_day,max_value=last_day,value=first_day) + end_treatment = st.date_input("Treatment end date",min_value=first_day,max_value=last_day,value=last_day) treatment_group = st.multiselect("Select treatment group", data1['location'].unique()) spend = st.number_input("Select spend") mmm_option = st.selectbox("Select the option to calculate the iROAS or iCPA", ["iROAS", "iCPA"]) @@ -656,7 +656,7 @@ def reset_states(): st.session_state.permutation_test_report = plot_permutation_test_report(results) st.session_state.period = period second_report_day = last_day - pd.Timedelta(days=period) - firt_report_day = last_day - pd.Timedelta(days=(period*2)-1) + first_report_day = last_day - pd.Timedelta(days=(period*2)-1) treatment_day = last_day - pd.Timedelta(days=period-1) @@ -688,10 +688,10 @@ def reset_states(): - if mmm_option == "iROAS": + if mmm_option == "iCPA": st.session_state.metric_mmm = spend / st.session_state.incremental else: - st.session_state.metric_mmm = spend / st.session_state.incremental + st.session_state.metric_mmm = st.session_state.incremental / spend @@ -711,12 +711,18 @@ def reset_states(): last_day = pd.to_datetime(last_day) treatment_day = last_day - pd.Timedelta(days=end_position_treatment - start_position_treatment) second_report_day = last_day - pd.Timedelta(days=st.session_state.period) - firt_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1) + first_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1) treatment_day = last_day - pd.Timedelta(days=st.session_state.period-1) + treatment_day = treatment_day.strftime('%Y-%m-%d') last_day = last_day.strftime('%Y-%m-%d') - firt_day = firt_day.strftime('%Y-%m-%d') - firt_report_day = firt_report_day.strftime('%Y-%m-%d') + first_day = first_day.strftime('%Y-%m-%d') + first_report_day = first_report_day.strftime('%Y-%m-%d') second_report_day = second_report_day.strftime('%Y-%m-%d') + # st.write(f"Last day: {last_day}") + # st.write(f"First day: {first_day}") + # st.write(f"First report day: {first_report_day}") + # st.write(f"Second report day: {second_report_day}") + # st.write(f"Treatment day: {treatment_day}") @@ -772,13 +778,13 @@ def reset_states(): st.session_state.period, st.session_state.permutation_test_report, treatment_day, - firt_day, + first_day, last_day, col_target, st.session_state.metric_mmm, st.session_state.mmm_option, st.session_state.lift_total, - firt_report_day, + first_report_day, second_report_day, st.session_state.pre_treatment, st.session_state.pre_counterfactual, diff --git a/setup.py b/setup.py index 05f55de..7c4bf27 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,10 @@ "tqdm", "matplotlib", "seaborn", - "plotly" + "plotly", + "millify", + "statsmodels", + ], author="Entropy Team", author_email="dev@entropy.tech",