From 8e5be48f9c16c791d41222134c665edd8fccab14 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Wed, 9 Apr 2025 10:52:50 -0600
Subject: [PATCH 01/14] Fix link documentation

---
 experimental_design.py     | 2 +-
 experimental_evaluation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/experimental_design.py b/experimental_design.py
index 623508a..2f82afc 100644
--- a/experimental_design.py
+++ b/experimental_design.py
@@ -29,7 +29,7 @@
         color: #3e7cb1 !important;  
     }
     </style>
-    <a class='custom-link' href="https://entropy.tech/murray/" target="_blank">Murray Documentation</a>
+    <a class='custom-link' href="https://docs-murray.entropy.tech/" target="_blank">Murray Documentation</a>
     """,
     unsafe_allow_html=True
 )
diff --git a/experimental_evaluation.py b/experimental_evaluation.py
index 965d2d6..fe2663f 100644
--- a/experimental_evaluation.py
+++ b/experimental_evaluation.py
@@ -30,7 +30,7 @@
         color: #3e7cb1 !important;  
     }
     </style>
-    <a class='custom-link' href="https://entropy.tech/murray/" target="_blank">Murray Documentation</a>
+    <a class='custom-link' href="https://docs-murray.entropy.tech/" target="_blank">Murray Documentation</a>
     """,
     unsafe_allow_html=True
 )

From 888987df01c0d2cf5f2cf7ccde22df024ba7032f Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Mon, 14 Apr 2025 01:40:48 -0600
Subject: [PATCH 02/14] Fix bug in mmm_option

---
 experimental_evaluation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/experimental_evaluation.py b/experimental_evaluation.py
index fe2663f..dc797c6 100644
--- a/experimental_evaluation.py
+++ b/experimental_evaluation.py
@@ -688,10 +688,10 @@ def reset_states():
                         
 
                 
-                if mmm_option == "iROAS":
+                if mmm_option == "iCPA":
                     st.session_state.metric_mmm = spend / st.session_state.incremental 
                 else:
-                    st.session_state.metric_mmm = spend / st.session_state.incremental 
+                    st.session_state.metric_mmm = st.session_state.incremental / spend 
 
 
                 

From ac56491301af80b18bb69913dabde7f9d5982624 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Fri, 18 Apr 2025 09:28:34 -0600
Subject: [PATCH 03/14] Fix firt variable

---
 experimental_evaluation.py | 40 +++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/experimental_evaluation.py b/experimental_evaluation.py
index dc797c6..c4e469a 100644
--- a/experimental_evaluation.py
+++ b/experimental_evaluation.py
@@ -42,8 +42,8 @@
 
 def generate_pdf(treatment_group, control_group, holdout_percentage, 
                  impact_graph,percenge_lift,p_value,power,period,
-                 permutation_test,treatment_day,firt_day,last_day,
-                 col_target,metric_mmm,mmm_option,lift_total,firt_report_day,second_report_day,
+                 permutation_test,treatment_day,first_day,last_day,
+                 col_target,metric_mmm,mmm_option,lift_total,first_report_day,second_report_day,
                  pre_treatment,pre_counterfactual,post_treatment,post_counterfactual,att,incremental,df,spend):
         """
         Generates a PDF report with explanations for each aspect.
@@ -94,7 +94,7 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
         pdf.set_font("Poppins", size=10)
         pdf.set_text_color(33, 31, 36)
         pdf.multi_cell(0,5 , f"This report provides information about the the results of the analysis of a treatment on the variable '{col_target}' with a duration of {period} days. "
-                       f"The data included in the design have a period of {firt_day} to {last_day} where the treatment started on {firt_day} until {last_day}."
+                       f"The data included in the design have a period of {first_day} to {last_day} where the treatment started on {first_day} until {last_day}."
                        f"It includes information about the treatment group, control group, and the statistics results of the analysis.")
         
 
@@ -187,8 +187,8 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
         
         header_texts = [
             "Group",
-            f"Pre-treatment\n({firt_report_day} to {second_report_day})",
-            f"Post-treatment\n({firt_day} to {last_day})",
+            f"Pre-treatment\n({first_report_day} to {second_report_day})",
+            f"Post-treatment\n({first_day} to {last_day})",
             "Increment"
         ]
 
@@ -440,10 +440,10 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
         st.session_state.incremental_report = None
 if 'last_day' not in st.session_state:
         st.session_state.last_day = None
-if 'firt_day' not in st.session_state:
-        st.session_state.firt_day = None
-if 'firt_report_day' not in st.session_state:
-        st.session_state.firt_report_day = None
+if 'first_day' not in st.session_state:
+        st.session_state.first_day = None
+if 'first_report_day' not in st.session_state:
+        st.session_state.first_report_day = None
 if 'second_report_day' not in st.session_state:
         st.session_state.second_report_day = None
         
@@ -560,7 +560,7 @@ def reset_states():
             st.subheader("3. Experimental evaluation")
             random_sate = data1['location'].unique()[0]
             filtered_data = data1[data1['location'] == random_sate]
-            firt_day = filtered_data['time'].min()
+            first_day = filtered_data['time'].min()
             last_day = filtered_data['time'].max()
             
 
@@ -568,8 +568,8 @@ def reset_states():
 
 
             st.text("Parameter configuration")
-            start_treatment = st.date_input("Treatment start date",min_value=firt_day,max_value=last_day,value=firt_day)
-            end_treatment = st.date_input("Treatment end date",min_value=firt_day,max_value=last_day,value=last_day)
+            start_treatment = st.date_input("Treatment start date",min_value=first_day,max_value=last_day,value=first_day)
+            end_treatment = st.date_input("Treatment end date",min_value=first_day,max_value=last_day,value=last_day)
             treatment_group = st.multiselect("Select treatment group", data1['location'].unique())
             spend = st.number_input("Select spend")
             mmm_option = st.selectbox("Select the option to calculate the iROAS or iCPA", ["iROAS", "iCPA"])
@@ -656,7 +656,7 @@ def reset_states():
                         st.session_state.permutation_test_report = plot_permutation_test_report(results)
                         st.session_state.period = period
                         second_report_day = last_day - pd.Timedelta(days=period)
-                        firt_report_day = last_day - pd.Timedelta(days=(period*2)-1)
+                        first_report_day = last_day - pd.Timedelta(days=(period*2)-1)
                         treatment_day = last_day - pd.Timedelta(days=period-1)
                 
                         
@@ -711,12 +711,16 @@ def reset_states():
                 last_day = pd.to_datetime(last_day)
                 treatment_day = last_day - pd.Timedelta(days=end_position_treatment - start_position_treatment)
                 second_report_day = last_day - pd.Timedelta(days=st.session_state.period)
-                firt_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
+                first_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
                 treatment_day = last_day - pd.Timedelta(days=st.session_state.period-1)
                 last_day = last_day.strftime('%Y-%m-%d')
-                firt_day = firt_day.strftime('%Y-%m-%d')
-                firt_report_day = firt_report_day.strftime('%Y-%m-%d')
+                first_day = first_day.strftime('%Y-%m-%d')
+                first_report_day = first_report_day.strftime('%Y-%m-%d')
                 second_report_day = second_report_day.strftime('%Y-%m-%d')
+                st.write(f"Last day: {last_day}")
+                st.write(f"First day: {first_day}")
+                st.write(f"First report day: {first_report_day}")
+                st.write(f"Second report day: {second_report_day}")
 
 
 
@@ -772,13 +776,13 @@ def reset_states():
                         st.session_state.period,
                         st.session_state.permutation_test_report,
                         treatment_day,
-                        firt_day,
+                        first_day,
                         last_day,
                         col_target,
                         st.session_state.metric_mmm,
                         st.session_state.mmm_option,
                         st.session_state.lift_total,
-                        firt_report_day,
+                        first_report_day,
                         second_report_day,
                         st.session_state.pre_treatment,
                         st.session_state.pre_counterfactual,

From 67ae5eeb9037fd159a5604370823a9d883c631fe Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Fri, 18 Apr 2025 09:45:15 -0600
Subject: [PATCH 04/14] Fix report dates

---
 experimental_evaluation.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/experimental_evaluation.py b/experimental_evaluation.py
index c4e469a..fa76c41 100644
--- a/experimental_evaluation.py
+++ b/experimental_evaluation.py
@@ -94,7 +94,7 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
         pdf.set_font("Poppins", size=10)
         pdf.set_text_color(33, 31, 36)
         pdf.multi_cell(0,5 , f"This report provides information about the the results of the analysis of a treatment on the variable '{col_target}' with a duration of {period} days. "
-                       f"The data included in the design have a period of {first_day} to {last_day} where the treatment started on {first_day} until {last_day}."
+                       f"The data included in the design have a period of {first_day} to {last_day} where the treatment started on {treatment_day} until {last_day}."
                        f"It includes information about the treatment group, control group, and the statistics results of the analysis.")
         
 
@@ -188,7 +188,7 @@ def generate_pdf(treatment_group, control_group, holdout_percentage,
         header_texts = [
             "Group",
             f"Pre-treatment\n({first_report_day} to {second_report_day})",
-            f"Post-treatment\n({first_day} to {last_day})",
+            f"Post-treatment\n({treatment_day} to {last_day})",
             "Increment"
         ]
 
@@ -713,6 +713,7 @@ def reset_states():
                 second_report_day = last_day - pd.Timedelta(days=st.session_state.period)
                 first_report_day = last_day - pd.Timedelta(days=(st.session_state.period*2)-1)
                 treatment_day = last_day - pd.Timedelta(days=st.session_state.period-1)
+                treatment_day = treatment_day.strftime('%Y-%m-%d')
                 last_day = last_day.strftime('%Y-%m-%d')
                 first_day = first_day.strftime('%Y-%m-%d')
                 first_report_day = first_report_day.strftime('%Y-%m-%d')
@@ -721,6 +722,7 @@ def reset_states():
                 st.write(f"First day: {first_day}")
                 st.write(f"First report day: {first_report_day}")
                 st.write(f"Second report day: {second_report_day}")
+                st.write(f"Treatment day: {treatment_day}")
 
 
 

From 7a416f24a69e73d7fbaebfa00f4e1c98b7df1dec Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Wed, 23 Apr 2025 12:27:10 -0600
Subject: [PATCH 05/14] Commented innecesary variables

---
 experimental_evaluation.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/experimental_evaluation.py b/experimental_evaluation.py
index fa76c41..ae6e7c7 100644
--- a/experimental_evaluation.py
+++ b/experimental_evaluation.py
@@ -718,11 +718,11 @@ def reset_states():
                 first_day = first_day.strftime('%Y-%m-%d')
                 first_report_day = first_report_day.strftime('%Y-%m-%d')
                 second_report_day = second_report_day.strftime('%Y-%m-%d')
-                st.write(f"Last day: {last_day}")
-                st.write(f"First day: {first_day}")
-                st.write(f"First report day: {first_report_day}")
-                st.write(f"Second report day: {second_report_day}")
-                st.write(f"Treatment day: {treatment_day}")
+                # st.write(f"Last day: {last_day}")
+                # st.write(f"First day: {first_day}")
+                # st.write(f"First report day: {first_report_day}")
+                # st.write(f"Second report day: {second_report_day}")
+                # st.write(f"Treatment day: {treatment_day}")
 
 
 

From d7be6b7659689980b873d361db858225e91e27a3 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Wed, 23 Apr 2025 14:23:55 -0600
Subject: [PATCH 06/14] Update print_incremental_results

---
 Murray/plots.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/Murray/plots.py b/Murray/plots.py
index 76758a2..5ea0d59 100644
--- a/Murray/plots.py
+++ b/Murray/plots.py
@@ -686,11 +686,26 @@ def print_incremental_results(geo_test, period, treatment_percentage):
     holdout_percentage = 100 - treatment_percentage
     title = "Incremental Results"
     att, incremental, fig = plot_impact_streamlit_app(geo_test, period, holdout_percentage)
+    
+    # Get the MDE from the sensitivity_results
+    sensitivity_results = geo_test['sensitivity_results']
+    results_by_size = geo_test['simulation_results']
+    
+    target_size_key = None
+    target_mde = None
+    for size_key, result in results_by_size.items():
+        current_holdout = result['Holdout Percentage']
+        if abs(current_holdout - holdout_percentage) < 0.01:
+            target_size_key = size_key
+            target_mde = sensitivity_results[size_key][period].get('MDE', None)
+            break
+    
     print("=" * 30)
     print(title.center(30))
     print("=" * 30)
     print(f"ATT: {round(att,2)}")
     print(f"Lift total: {round(incremental,2)}")
+    print(f"MDE: {round(target_mde*100,2)}%")
     print("=" * 30)
 
 

From b73f54a63ed9b235469918eece349469c81899fb Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Wed, 23 Apr 2025 14:25:56 -0600
Subject: [PATCH 07/14] Add install_requires

---
 setup.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 05f55de..7c4bf27 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,10 @@
         "tqdm",
         "matplotlib",
         "seaborn",
-        "plotly"
+        "plotly",
+        "millify",
+        "statsmodels",
+
     ],
     author="Entropy Team",
     author_email="dev@entropy.tech",

From e1632f4ef6b5d0327e43de8b8659e23526b9f4de Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Wed, 23 Apr 2025 15:24:14 -0600
Subject: [PATCH 08/14] Add mde to hover on

---
 Murray/plots.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/Murray/plots.py b/Murray/plots.py
index 5ea0d59..5c73c26 100644
--- a/Murray/plots.py
+++ b/Murray/plots.py
@@ -388,10 +388,19 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
 
     custom_data = []
     for s in sorted_sizes:
-        custom_data.append([s] * len(periods))
+        # Get the MDE for each size and period
+        mde_data = []
+        for period in periods:
+            mde = sensitivity_results[s][period].get('MDE', None)
+            mde_data.append(f"{mde:.2%}" if mde is not None else "N/A")
+        custom_data.append(mde_data)
 
     fig.data[0].customdata = custom_data
-    fig.data[0].hovertemplate = "Treatment size: %{customdata}<br><extra></extra>"
+    fig.data[0].hovertemplate = (
+        "Treatment size: %{customdata}<br>" +
+        "MDE: %{customdata}<br>" +
+        "<extra></extra>"
+    )
     fig.data[0].hoverinfo = "skip"
 
     return fig

From afe37351f60cded4c9fd7fed3f670b894d1fde24 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Wed, 23 Apr 2025 22:33:21 -0600
Subject: [PATCH 09/14] Converted treatment array to 1D

---
 Murray/plots.py | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/Murray/plots.py b/Murray/plots.py
index 5c73c26..0ae020c 100644
--- a/Murray/plots.py
+++ b/Murray/plots.py
@@ -409,6 +409,39 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 def print_weights(geo_test, treatment_percentage=None, num_locations=None):
     """
     Extracts control group weights based on holdout percentage or number of locations.
@@ -937,12 +970,19 @@ def plot_impact_evaluation(results_evaluation):
         treatment (array): Treatment group values
         period (int): Treatment period length
     """
+    
 
     counterfactual = results_evaluation['predictions']
     treatment = results_evaluation['treatment']
     period = results_evaluation['period']
     length_treatment = results_evaluation['length_treatment']
 
+    if len(treatment.shape) > 1:
+        treatment = treatment.squeeze()
+
+    if len(counterfactual.shape) > 1:
+        counterfactual = counterfactual.squeeze()
+
     point_difference = treatment - counterfactual
     cumulative_effect = ([0] * (len(treatment) - period)) + (np.cumsum(point_difference[len(treatment)-period:])).tolist()
 
@@ -1574,4 +1614,4 @@ def calculate_confidence_bands(data, alpha=0.05):
     lower_bound = data - margin
     upper_bound = data + margin
     
-    return lower_bound, upper_bound 
\ No newline at end of file
+    return lower_bound, upper_bound  
\ No newline at end of file

From f0d459ac9837453ea7f413446e00f283d06d2868 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Wed, 23 Apr 2025 23:23:33 -0600
Subject: [PATCH 10/14] Optimized gaussian_kde

---
 Murray/plots.py | 69 +++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 39 deletions(-)

diff --git a/Murray/plots.py b/Murray/plots.py
index 0ae020c..3556ed8 100644
--- a/Murray/plots.py
+++ b/Murray/plots.py
@@ -1182,94 +1182,85 @@ def plot_permutation_test(results_evaluation, Significance_level=0.1):
     Returns:
         fig: Plotly figure.
     """
-
     null_stats = results_evaluation['null_stats']
     observed_stat = results_evaluation['observed_stat']
     
-
+    # Calcular percentiles para los límites
     upper_bound = np.percentile(null_stats, 100 * (1 - (Significance_level / 2)))
     lower_bound = np.percentile(null_stats, 100 * (Significance_level / 2))
     
-
-
+    # Optimización: Reducir el número de puntos para el KDE
+    # Usar menos puntos para el histograma y el KDE
     kde = stats.gaussian_kde(null_stats)
-    x_kde = np.linspace(min(null_stats), max(null_stats), 300)
+    x_kde = np.linspace(min(null_stats), max(null_stats), 100)  # Reducido de 300 a 100
     y_kde = kde(x_kde)
-
-
-    max_hist_y = max(kde(null_stats))  
-
-
+    
+    # Calcular el máximo de densidad para la línea vertical
+    max_hist_y = max(y_kde)  # Usar y_kde en lugar de kde(null_stats)
+    
     fig = go.Figure()
-
-
+    
+    # Histograma con menos bins
     fig.add_trace(go.Histogram(
         x=null_stats,
-        nbinsx=30,
+        nbinsx=20,  # Reducido de 30 a 20
         histnorm='probability density',
         name="Null Stats",
-        marker=dict(color=blue,line=dict(color="black",width=1)),
+        marker=dict(color=blue, line=dict(color="black", width=1)),
         opacity=0.6
     ))
-
-
+    
+    # KDE plot
     fig.add_trace(go.Scatter(
         x=x_kde,
         y=y_kde,
         mode="lines",
         name="KDE Density",
         showlegend=False,
-
         line=dict(color="darkblue", width=2)
     ))
-
-
-
+    
+    # Línea vertical para el estadístico observado
     fig.add_trace(go.Scatter(
         x=[observed_stat, observed_stat],
-        y=[0, max_hist_y],  
+        y=[0, max_hist_y],
         mode="lines",
         name="Observed Stat",
         line=dict(color="black", dash="dash", width=1.5)
     ))
-
+    
     def hex_to_rgba(hex_color, alpha=0.4):
-      """Convierte un color HEX a RGBA con transparencia controlada."""
-      hex_color = hex_color.lstrip("#")
-      r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
-      return f"rgba({r},{g},{b},{alpha})"
-
-
-
+        hex_color = hex_color.lstrip("#")
+        r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
+        return f"rgba({r},{g},{b},{alpha})"
+    
+    # Zonas de significancia
     fig.add_trace(go.Scatter(
         x=[upper_bound, max(null_stats), max(null_stats), upper_bound],
-
-        y=[0, 0, max_hist_y, max_hist_y],  
+        y=[0, 0, max_hist_y, max_hist_y],
         fill="toself",
-        fillcolor=hex_to_rgba(purple_light, 0.3),  
+        fillcolor=hex_to_rgba(purple_light, 0.3),
         line=dict(color="rgba(255,0,0,0)"),
         name="Upper Significance Zone"
     ))
-
+    
     fig.add_trace(go.Scatter(
         x=[min(null_stats), lower_bound, lower_bound, min(null_stats), min(null_stats)],
-        y=[0, 0, max_hist_y, max_hist_y, 0],  
+        y=[0, 0, max_hist_y, max_hist_y, 0],
         fill="toself",
-        fillcolor=hex_to_rgba(purple_light, 0.3),  
+        fillcolor=hex_to_rgba(purple_light, 0.3),
         line=dict(color="rgba(255,0,0,0)"),
         name="Lower Significance Zone"
     ))
-
+    
     fig.update_layout(
         title="Permutation Test",
         xaxis_title="Conformity Score",
         yaxis_title="Density",
         template="plotly_white",
-
         bargap=0
-
     )
-
+    
     return fig
 
 

From 252aff015d761855abeba314ef64dac373e2dcb7 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Thu, 24 Apr 2025 04:00:58 -0600
Subject: [PATCH 11/14] Fix hover on

---
 Murray/plots.py | 82 +++++++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/Murray/plots.py b/Murray/plots.py
index 3556ed8..d532622 100644
--- a/Murray/plots.py
+++ b/Murray/plots.py
@@ -385,20 +385,21 @@ def calculate_penalty_score(mde, period_idx, total_periods, size, results_by_siz
                    tickfont=dict(size=12, color='black'))
     )
 
-
     custom_data = []
-    for s in sorted_sizes:
-        # Get the MDE for each size and period
+    for i, s in enumerate(sorted_sizes):
         mde_data = []
         for period in periods:
             mde = sensitivity_results[s][period].get('MDE', None)
-            mde_data.append(f"{mde:.2%}" if mde is not None else "N/A")
+            mde_data.append([
+                s,  # Treatment size
+                f"{mde:.2%}" if mde is not None else "N/A"  # MDE
+            ])
         custom_data.append(mde_data)
 
     fig.data[0].customdata = custom_data
     fig.data[0].hovertemplate = (
-        "Treatment size: %{customdata}<br>" +
-        "MDE: %{customdata}<br>" +
+        "Treatment size: %{customdata[0]}<br>" +
+        "MDE: %{customdata[1]}<br>" +
         "<extra></extra>"
     )
     fig.data[0].hoverinfo = "skip"
@@ -1182,85 +1183,94 @@ def plot_permutation_test(results_evaluation, Significance_level=0.1):
     Returns:
         fig: Plotly figure.
     """
+
     null_stats = results_evaluation['null_stats']
     observed_stat = results_evaluation['observed_stat']
     
-    # Calcular percentiles para los límites
+
     upper_bound = np.percentile(null_stats, 100 * (1 - (Significance_level / 2)))
     lower_bound = np.percentile(null_stats, 100 * (Significance_level / 2))
     
-    # Optimización: Reducir el número de puntos para el KDE
-    # Usar menos puntos para el histograma y el KDE
+
+
     kde = stats.gaussian_kde(null_stats)
-    x_kde = np.linspace(min(null_stats), max(null_stats), 100)  # Reducido de 300 a 100
+    x_kde = np.linspace(min(null_stats), max(null_stats), 300)
     y_kde = kde(x_kde)
-    
-    # Calcular el máximo de densidad para la línea vertical
-    max_hist_y = max(y_kde)  # Usar y_kde en lugar de kde(null_stats)
-    
+
+
+    max_hist_y = max(kde(null_stats))  
+
+
     fig = go.Figure()
-    
-    # Histograma con menos bins
+
+
     fig.add_trace(go.Histogram(
         x=null_stats,
-        nbinsx=20,  # Reducido de 30 a 20
+        nbinsx=30,
         histnorm='probability density',
         name="Null Stats",
-        marker=dict(color=blue, line=dict(color="black", width=1)),
+        marker=dict(color=blue,line=dict(color="black",width=1)),
         opacity=0.6
     ))
-    
-    # KDE plot
+
+
     fig.add_trace(go.Scatter(
         x=x_kde,
         y=y_kde,
         mode="lines",
         name="KDE Density",
         showlegend=False,
+
         line=dict(color="darkblue", width=2)
     ))
-    
-    # Línea vertical para el estadístico observado
+
+
+
     fig.add_trace(go.Scatter(
         x=[observed_stat, observed_stat],
-        y=[0, max_hist_y],
+        y=[0, max_hist_y],  
         mode="lines",
         name="Observed Stat",
         line=dict(color="black", dash="dash", width=1.5)
     ))
-    
+
     def hex_to_rgba(hex_color, alpha=0.4):
-        hex_color = hex_color.lstrip("#")
-        r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
-        return f"rgba({r},{g},{b},{alpha})"
-    
-    # Zonas de significancia
+      """Convierte un color HEX a RGBA con transparencia controlada."""
+      hex_color = hex_color.lstrip("#")
+      r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
+      return f"rgba({r},{g},{b},{alpha})"
+
+
+
     fig.add_trace(go.Scatter(
         x=[upper_bound, max(null_stats), max(null_stats), upper_bound],
-        y=[0, 0, max_hist_y, max_hist_y],
+
+        y=[0, 0, max_hist_y, max_hist_y],  
         fill="toself",
-        fillcolor=hex_to_rgba(purple_light, 0.3),
+        fillcolor=hex_to_rgba(purple_light, 0.3),  
         line=dict(color="rgba(255,0,0,0)"),
         name="Upper Significance Zone"
     ))
-    
+
     fig.add_trace(go.Scatter(
         x=[min(null_stats), lower_bound, lower_bound, min(null_stats), min(null_stats)],
-        y=[0, 0, max_hist_y, max_hist_y, 0],
+        y=[0, 0, max_hist_y, max_hist_y, 0],  
         fill="toself",
-        fillcolor=hex_to_rgba(purple_light, 0.3),
+        fillcolor=hex_to_rgba(purple_light, 0.3),  
         line=dict(color="rgba(255,0,0,0)"),
         name="Lower Significance Zone"
     ))
-    
+
     fig.update_layout(
         title="Permutation Test",
         xaxis_title="Conformity Score",
         yaxis_title="Density",
         template="plotly_white",
+
         bargap=0
+
     )
-    
+
     return fig
 
 

From 68fa1609b48b0924017f5de0187e39efd4e32079 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Thu, 24 Apr 2025 04:38:18 -0600
Subject: [PATCH 12/14] Optimized plot_permutation_test

---
 Murray/plots.py | 48 ++++++++++++++----------------------------------
 1 file changed, 14 insertions(+), 34 deletions(-)

diff --git a/Murray/plots.py b/Murray/plots.py
index d532622..fff71b2 100644
--- a/Murray/plots.py
+++ b/Murray/plots.py
@@ -1183,81 +1183,63 @@ def plot_permutation_test(results_evaluation, Significance_level=0.1):
     Returns:
         fig: Plotly figure.
     """
-
     null_stats = results_evaluation['null_stats']
     observed_stat = results_evaluation['observed_stat']
     
-
     upper_bound = np.percentile(null_stats, 100 * (1 - (Significance_level / 2)))
     lower_bound = np.percentile(null_stats, 100 * (Significance_level / 2))
     
-
-
-    kde = stats.gaussian_kde(null_stats)
-    x_kde = np.linspace(min(null_stats), max(null_stats), 300)
+    kde = stats.gaussian_kde(null_stats, bw_method='scott')  
+    x_kde = np.linspace(min(null_stats), max(null_stats), 100)  
     y_kde = kde(x_kde)
 
-
-    max_hist_y = max(kde(null_stats))  
-
+    max_hist_y = np.max(y_kde) * 1.1  
 
     fig = go.Figure()
 
-
     fig.add_trace(go.Histogram(
         x=null_stats,
-        nbinsx=30,
+        nbinsx=20,  
         histnorm='probability density',
         name="Null Stats",
-        marker=dict(color=blue,line=dict(color="black",width=1)),
+        marker=dict(color=blue, line=dict(color="black", width=1)),
         opacity=0.6
     ))
 
-
     fig.add_trace(go.Scatter(
         x=x_kde,
         y=y_kde,
         mode="lines",
         name="KDE Density",
         showlegend=False,
-
         line=dict(color="darkblue", width=2)
     ))
 
-
-
     fig.add_trace(go.Scatter(
         x=[observed_stat, observed_stat],
-        y=[0, max_hist_y],  
+        y=[0, max_hist_y],
         mode="lines",
         name="Observed Stat",
         line=dict(color="black", dash="dash", width=1.5)
     ))
 
-    def hex_to_rgba(hex_color, alpha=0.4):
-      """Convierte un color HEX a RGBA con transparencia controlada."""
-      hex_color = hex_color.lstrip("#")
-      r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
-      return f"rgba({r},{g},{b},{alpha})"
-
-
+    significance_color = f"rgba(187,178,199,0.3)"  
 
     fig.add_trace(go.Scatter(
         x=[upper_bound, max(null_stats), max(null_stats), upper_bound],
-
-        y=[0, 0, max_hist_y, max_hist_y],  
+        y=[0, 0, max_hist_y, max_hist_y],
         fill="toself",
-        fillcolor=hex_to_rgba(purple_light, 0.3),  
-        line=dict(color="rgba(255,0,0,0)"),
+        fillcolor=significance_color,
+        line=dict(width=0),
         name="Upper Significance Zone"
     ))
 
     fig.add_trace(go.Scatter(
-        x=[min(null_stats), lower_bound, lower_bound, min(null_stats), min(null_stats)],
-        y=[0, 0, max_hist_y, max_hist_y, 0],  
+        x=[min(null_stats), lower_bound, lower_bound, min(null_stats)],
+        y=[0, 0, max_hist_y, max_hist_y],
         fill="toself",
-        fillcolor=hex_to_rgba(purple_light, 0.3),  
-        line=dict(color="rgba(255,0,0,0)"),
+        fillcolor=significance_color,
+        line=dict(width=0),
         name="Lower Significance Zone"
     ))
 
@@ -1266,9 +1248,7 @@ def hex_to_rgba(hex_color, alpha=0.4):
         xaxis_title="Conformity Score",
         yaxis_title="Density",
         template="plotly_white",
-
         bargap=0
-
     )
 
     return fig

From 87f24e627d2391c7ffa269f9d2231647372ae473 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Thu, 24 Apr 2025 05:44:17 -0600
Subject: [PATCH 13/14] update readme

---
 Murray/tests/__init__.py                 |  17 ----
 Murray/tests/test_better_groups.py       | 113 -----------------------
 Murray/tests/test_market_correlations.py |  25 -----
 Murray/tests/test_power_analysis.py      |  90 ------------------
 Murray/tests/test_run_geo_analysis.py    |  38 --------
 Murray/tests/test_run_geo_evaluation.py  |  46 ---------
 Murray/tests/test_select_markets.py      |  86 -----------------
 Murray/tests/test_synthetic_control.py   |  51 ----------
 Murray/tests/test_upload_data.py         |  27 ------
 README.md                                |   2 +-
 10 files changed, 1 insertion(+), 494 deletions(-)
 delete mode 100644 Murray/tests/__init__.py
 delete mode 100644 Murray/tests/test_better_groups.py
 delete mode 100644 Murray/tests/test_market_correlations.py
 delete mode 100644 Murray/tests/test_power_analysis.py
 delete mode 100644 Murray/tests/test_run_geo_analysis.py
 delete mode 100644 Murray/tests/test_run_geo_evaluation.py
 delete mode 100644 Murray/tests/test_select_markets.py
 delete mode 100644 Murray/tests/test_synthetic_control.py
 delete mode 100644 Murray/tests/test_upload_data.py

diff --git a/Murray/tests/__init__.py b/Murray/tests/__init__.py
deleted file mode 100644
index 05cc6ef..0000000
--- a/Murray/tests/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from Murray.main import run_geo_analysis
-from Murray.post_analysis import run_geo_evaluation
-from Murray.auxiliary import cleaned_data,market_correlations
-from Murray.plots import (
-    plot_geodata,
-    print_locations,
-    print_weights,
-    plot_impact_graphs,
-    print_incremental_results,
-    plot_metrics,
-    plot_impact_graphs_evaluation,
-    print_incremental_results_evaluation,
-    plot_permutation_test
-)
-
-__version__ = "1.1.0"
-
diff --git a/Murray/tests/test_better_groups.py b/Murray/tests/test_better_groups.py
deleted file mode 100644
index ba805f8..0000000
--- a/Murray/tests/test_better_groups.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import pytest
-import numpy as np
-import pandas as pd
-from sklearn.preprocessing import MinMaxScaler
-from Murray.main import BetterGroups, SyntheticControl, select_treatments, select_controls
-from Murray.auxiliary import market_correlations, cleaned_data
-
-@pytest.fixture(scope="module")
-def cleaned_dataframe():
-    """Fixture that creates synthetic test data"""
-    np.random.seed(42)  
-    
-    dates = pd.date_range(start='2023-01-01', periods=100)
-    regions = ['Region_A', 'Region_B', 'Region_C', 'Region_D', 'Region_E']
-    
-    data = []
-    for region in regions:
-        base_value = np.random.randint(50, 100)
-        for date in dates:
-            value = base_value + np.sin(date.day/15) * 10 + np.random.normal(0, 2)
-            data.append({
-                'date': date,
-                'region': region,
-                'add_to_carts': max(0, int(value))  
-            })
-    
-    df = pd.DataFrame(data)
-    return cleaned_data(df, "add_to_carts", "region", "date")
-
-@pytest.fixture(scope="module")
-def correlation_matrix(cleaned_dataframe):
-    """Fixture that generates the correlation matrix from synthetic data"""
-    return market_correlations(cleaned_dataframe)
-
-@pytest.fixture(scope="module")
-def similarity_matrix(correlation_matrix):
-    """Fixture to generate a similarity matrix"""
-    return correlation_matrix.copy()
-
-@pytest.fixture
-def test_data(cleaned_dataframe):
-    """Fixture to generate test data"""
-    return cleaned_dataframe.copy()
-
-
-def test_better_groups_valid(similarity_matrix, correlation_matrix, test_data):
-    results = BetterGroups(
-        similarity_matrix=similarity_matrix,
-        excluded_locations=[],
-        data=test_data,
-        correlation_matrix=correlation_matrix,
-        maximum_treatment_percentage=0.50
-    )
-
-    assert isinstance(results, dict), "The result must be a dictionary"
-    assert len(results) > 0, "There must be at least one evaluated treatment group"
-    for size, result in results.items():
-        assert "Best Treatment Group" in result, "Missing treatment group"
-        assert "Control Group" in result, "Missing control group"
-        assert "MAPE" in result, "Missing MAPE metric"
-        assert "SMAPE" in result, "Missing SMAPE metric"
-        assert "Holdout Percentage" in result, "Missing holdout percentage"
-        assert result["MAPE"] >= 0, "MAPE must be a positive number"
-        assert 0 <= result["Holdout Percentage"] <= 100, "Holdout must be between 0 and 100"
-
-
-def test_better_groups_no_valid_treatments(similarity_matrix, correlation_matrix, test_data):
-    test_data = test_data[test_data["location"].isin(["X", "Y"])]  
-    print(f"test data: {test_data}")
-    results = BetterGroups(
-        similarity_matrix=similarity_matrix,
-        excluded_locations=[],
-        data=test_data,
-        correlation_matrix=correlation_matrix,
-        maximum_treatment_percentage=0.50
-    )
-
-    assert results is None, "If there are no valid locations, the result must be None"
-
-
-def test_better_groups_scaled_data(similarity_matrix, correlation_matrix, test_data):
-    scaler = MinMaxScaler()
-    test_data["Y"] = scaler.fit_transform(test_data["Y"].values.reshape(-1, 1))  
-
-    results = BetterGroups(
-        similarity_matrix=similarity_matrix,
-        excluded_locations=[],
-        data=test_data,
-        correlation_matrix=correlation_matrix,
-        maximum_treatment_percentage=0.50
-    )
-
-    assert isinstance(results, dict), "The result must be a dictionary"
-    assert all(isinstance(result["MAPE"], (float, int)) for result in results.values()), "MAPE must be a number"
-
-
-def test_better_groups_no_control(monkeypatch, similarity_matrix, correlation_matrix, test_data):
-    
-    def fake_select_controls(correlation_matrix, treatment_group, min_correlation):
-        return []  
-    
-    monkeypatch.setattr("Murray.main.select_controls", fake_select_controls)
-
-    results = BetterGroups(
-        similarity_matrix=similarity_matrix,
-        excluded_locations=[],
-        data=test_data,
-        correlation_matrix=correlation_matrix,
-        maximum_treatment_percentage=0.50
-    )
-
-    for result in results.values():
-        assert result["MAPE"] == float('inf'), "If there are no controls, MAPE must be infinite"
diff --git a/Murray/tests/test_market_correlations.py b/Murray/tests/test_market_correlations.py
deleted file mode 100644
index 8bcc404..0000000
--- a/Murray/tests/test_market_correlations.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import os
-import pandas as pd
-import pytest
-from Murray.auxiliary import market_correlations,cleaned_data
-
-
-DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data"))
-
-
-tests = [
-    (os.path.join(DATA_DIR, "data1.csv"), "add_to_carts", "region", "date"),
-    (os.path.join(DATA_DIR, "data2.csv"), "sessions", "location", "day"),
-]
-
-@pytest.fixture
-def cleaned_data_fixture(dataset_path, col_target, col_locations, col_dates):
-    df = pd.read_csv(dataset_path)
-    return cleaned_data(df, col_target, col_locations, col_dates)
-
-
-@pytest.mark.parametrize("dataset_path, col_target, col_locations, col_dates", tests)
-def test_market_correlations(cleaned_data_fixture):
-    
-    correlation_matrix = market_correlations(cleaned_data_fixture)
-    assert isinstance(correlation_matrix, pd.DataFrame), "market_correlations should return a DataFrame"
diff --git a/Murray/tests/test_power_analysis.py b/Murray/tests/test_power_analysis.py
deleted file mode 100644
index ca1f409..0000000
--- a/Murray/tests/test_power_analysis.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import pytest
-import numpy as np
-import pandas as pd
-from Murray.main import apply_lift, calculate_conformity, simulate_power, run_simulation, evaluate_sensitivity
-
-@pytest.fixture
-def synthetic_series():
-    """Fixture to generate a synthetic time series."""
-    np.random.seed(42)
-    y = np.random.rand(100) * 100  
-    return y
-
-
-def test_apply_lift(synthetic_series):
-    y = synthetic_series.copy()
-    y_lifted = apply_lift(y, delta=0.1, start_treatment=50, end_treatment=70)
-
-    assert np.all(y_lifted[:50] == y[:50]), "Values before the treatment should not change"
-    assert np.all(y_lifted[70:] == y[70:]), "Values after the treatment should not change"
-    assert np.all(y_lifted[50:70] == y[50:70] * 1.1), "The lift should be applied in the treatment period"
-
-
-def test_calculate_conformity(synthetic_series):
-    y_real = synthetic_series.copy()
-    y_control = synthetic_series.copy() * 0.9  
-
-    conformity = calculate_conformity(y_real, y_control, start_treatment=50, end_treatment=70)
-
-    expected_conformity = np.mean(y_real[50:70]) - np.mean(y_control[50:70])
-    assert np.isclose(conformity, expected_conformity), "The calculated conformity should match the expected value"
-
-
-def test_simulate_power(synthetic_series):
-    y_real = synthetic_series.copy()
-    y_control = synthetic_series.copy() * 0.95  
-
-    delta, power, y_lifted = simulate_power(
-        y_real=y_real,
-        y_control=y_control,
-        delta=0.1,
-        period=20,
-        n_permutations=100,
-        significance_level=0.05
-    )
-
-    assert isinstance(delta, float), "Delta must be a float"
-    assert isinstance(power, float), "Statistical power must be a float"
-    assert isinstance(y_lifted, np.ndarray), "The adjusted series must be a NumPy array"
-    assert len(y_lifted) == len(y_real), "The adjusted series must have the same length as the original"
-
-
-def test_run_simulation(synthetic_series):
-    y_real = synthetic_series.copy()
-    y_control = synthetic_series.copy() * 0.98
-
-    delta, power, y_lifted = run_simulation(
-        delta=0.2,
-        y_real=y_real,
-        y_control=y_control,
-        period=20,
-        n_permutations=100,
-        significance_level=0.05
-    )
-
-    assert isinstance(delta, float), "Delta must be a float"
-    assert isinstance(power, float), "Statistical power must be a float"
-    assert isinstance(y_lifted, np.ndarray), "The adjusted series must be a NumPy array"
-
-
-def test_evaluate_sensitivity():
-    """Test the sensitivity evaluation function"""
-    results_by_size = {
-        50: {"Actual Target Metric (y)": np.random.rand(100) * 100, "Predictions": np.random.rand(100) * 100}
-    }
-    deltas = [0.05, 0.1, 0.2]
-    periods = [10, 20, 30]
-    n_permutations = 50
-
-    sensitivity_results, lift_series = evaluate_sensitivity(
-        results_by_size=results_by_size,
-        deltas=deltas,
-        periods=periods,
-        n_permutations=n_permutations,
-        significance_level=0.05
-    )
-
-    assert isinstance(sensitivity_results, dict), "The result must be a dictionary"
-    assert isinstance(lift_series, dict), "The lift series must be a dictionary"
-    assert all(isinstance(v, dict) for v in sensitivity_results.values()), "Each value in sensitivity_results must be a dictionary"
-    assert all(isinstance(v, np.ndarray) for v in lift_series.values()), "Each value in lift_series must be a NumPy array"
diff --git a/Murray/tests/test_run_geo_analysis.py b/Murray/tests/test_run_geo_analysis.py
deleted file mode 100644
index 5d621cf..0000000
--- a/Murray/tests/test_run_geo_analysis.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import pytest
-import numpy as np
-import pandas as pd
-from Murray.main import run_geo_analysis_streamlit_app
-from Murray.auxiliary import market_correlations, cleaned_data
-
-@pytest.fixture
-def sample_data():
-    """Fixture that generates a test DataFrame with synthetic data."""
-    np.random.seed(42)
-    data = pd.DataFrame({
-        "time": np.tile(pd.date_range("2023-01-01", periods=100, freq="D"), 10),
-        "location": np.repeat([f"Location_{i}" for i in range(10)], 100),
-        "Y": np.random.rand(1000) * 100
-    })
-    return data
-
-
-def test_run_geo_analysis(sample_data):
-    """Checks that the analysis function runs correctly."""
-    results = run_geo_analysis_streamlit_app(
-        data=sample_data,
-        maximum_treatment_percentage=0.50,
-        significance_level=0.05,
-        deltas_range=(0.05, 0.2, 0.05),
-        periods_range=(10, 30, 10),
-        excluded_locations=["Location_1"],
-        n_permutations=100  
-    )
-
-    assert isinstance(results, dict), "The result must be a dictionary"
-    assert "simulation_results" in results, "Missing 'simulation_results' in the results"
-    assert "sensitivity_results" in results, "Missing 'sensitivity_results' in the results"
-    assert "series_lifts" in results, "Missing 'series_lifts' in the results"
-
-    assert isinstance(results["simulation_results"], dict), "simulation_results must be a dictionary"
-    assert isinstance(results["sensitivity_results"], dict), "sensitivity_results must be a dictionary"
-    assert isinstance(results["series_lifts"], dict), "series_lifts must be a dictionary"
diff --git a/Murray/tests/test_run_geo_evaluation.py b/Murray/tests/test_run_geo_evaluation.py
deleted file mode 100644
index ed6b022..0000000
--- a/Murray/tests/test_run_geo_evaluation.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import pytest
-import numpy as np
-import pandas as pd
-from Murray.post_analysis import run_geo_evaluation
-from Murray.auxiliary import market_correlations, cleaned_data
-
-@pytest.fixture
-def sample_data():
-    """Fixture that generates a test DataFrame with fictitious data"""
-    np.random.seed(42)
-    data = pd.DataFrame({
-        "time": np.tile(pd.date_range("2023-01-01", periods=100, freq="D"), 10),
-        "location": np.repeat([f"Location_{i}" for i in range(10)], 100),
-        "Y": np.random.rand(1000) * 100
-    })
-    return data
-
-
-def test_run_geo_evaluation(sample_data):
-    """Checks that the geographic evaluation function runs correctly"""
-    results = run_geo_evaluation(
-        data_input=sample_data,
-        start_treatment="2023-03-01",
-        end_treatment="2023-03-10",
-        treatment_group=["Location_0", "Location_1"],
-        spend=50000,
-        n_permutations=100,  
-        inference_type="iid",
-        significance_level=0.05
-    )
-
-    assert isinstance(results, dict), "The result must be a dictionary"
-    expected_keys = [
-        "MAPE", "SMAPE", "predictions", "treatment", "p_value", "power",
-        "percenge_lift", "control_group", "observed_stat",
-        "null_stats", "weights", "period", "spend", "length_treatment"
-    ]
-    for key in expected_keys:
-        assert key in results, f"Missing the key '{key}' in the results"
-
-    assert isinstance(results["MAPE"], float), "MAPE must be a float"
-    assert isinstance(results["p_value"], float), "p_value must be a float"
-    assert isinstance(results["power"], float), "Power must be a float"
-    assert isinstance(results["control_group"], list), "Control group must be a list"
-    assert 0 <= results["power"] <= 1, "Power must be between 0 and 1"
-    assert 0 <= results["p_value"] <= 1, "p_value must be between 0 and 1"
diff --git a/Murray/tests/test_select_markets.py b/Murray/tests/test_select_markets.py
deleted file mode 100644
index 25ca78b..0000000
--- a/Murray/tests/test_select_markets.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import pytest
-import numpy as np
-import pandas as pd
-from Murray.main import select_treatments, select_controls
-from Murray.auxiliary import market_correlations, cleaned_data
-
-@pytest.fixture(scope="module")
-def cleaned_dataframe():
-    """Fixture that creates synthetic test data"""
-    np.random.seed(42)  
-    
-    dates = pd.date_range(start='2023-01-01', periods=100)
-    regions = ['Region_A', 'Region_B', 'Region_C', 'Region_D', 'Region_E']
-    
-    data = []
-    for region in regions:
-        base_value = np.random.randint(50, 100)
-        for date in dates:
-            value = base_value + np.sin(date.day/15) * 10 + np.random.normal(0, 2)
-            data.append({
-                'date': date,
-                'region': region,
-                'add_to_carts': max(0, int(value))
-            })
-    
-    df = pd.DataFrame(data)
-    return cleaned_data(df, "add_to_carts", "region", "date")
-
-@pytest.fixture(scope="module")
-def correlation_matrix(cleaned_dataframe):
-    """Fixture that generates the correlation matrix"""
-    return market_correlations(cleaned_dataframe)
-
-def test_select_treatments_valid(cleaned_dataframe, correlation_matrix):
-    """Test to verify that treatments are correctly selected with a randomly excluded location"""
-    excluded_location = np.random.choice(cleaned_dataframe["location"].unique()) 
-    treatments = select_treatments(correlation_matrix, treatment_size=2, excluded_locations=[excluded_location])
-
-    assert isinstance(treatments, list), "The result must be a list"
-    assert all(isinstance(group, list) for group in treatments), "Each combination must be a list"
-    assert all(len(group) == 2 for group in treatments), "Each combination must have 2 treatments"
-    assert excluded_location not in [loc for group in treatments for loc in group], "The excluded location must not appear in the treatments"
-
-def test_select_treatments_invalid_location(correlation_matrix):
-    """Should raise a KeyError if an excluded location is not in the matrix"""
-    with pytest.raises(KeyError, match="not present in the similarity matrix"):
-        select_treatments(correlation_matrix, treatment_size=2, excluded_locations=["X", "Y"])
-
-def test_select_treatments_treatment_size_too_large(correlation_matrix):
-    """Should raise ValueError if treatment_size is greater than the number of available columns"""
-    with pytest.raises(ValueError, match="The treatment size .* exceeds the available number of columns"):
-        select_treatments(correlation_matrix, treatment_size=100, excluded_locations=[])
-
-def test_select_treatments_treatment_size_equals_columns(correlation_matrix):
-    """Should return only one combination when treatment_size is equal to the available columns"""
-    num_columns = correlation_matrix.shape[1]  
-    treatments = select_treatments(correlation_matrix, treatment_size=num_columns, excluded_locations=[])
-    
-    assert len(treatments) == 1, "There must be only one possible combination"
-    assert set(treatments[0]) == set(correlation_matrix.columns), "It must contain all possible locations"
-
-
-def test_select_controls_valid(cleaned_dataframe, correlation_matrix):
-    """Test to verify that controls are correctly selected based on treatments"""
-    excluded_location = np.random.choice(cleaned_dataframe["location"].unique())  
-    treatments = select_treatments(correlation_matrix, treatment_size=2, excluded_locations=[excluded_location])
-    
-    for treatment_group in treatments:
-        controls = select_controls(correlation_matrix, treatment_group)
-        assert isinstance(controls, list), "The result must be a list"
-        assert len(controls) > 0, "There must be at least one control available"
-        assert all(loc not in treatment_group for loc in controls), "Controls must not be in the treatment group"
-
-def test_select_controls_invalid_treatments(correlation_matrix):
-    """Should handle nonexistent treatments without failing"""
-    fake_treatment_group = ["X", "Y", "Z"]  
-    controls = select_controls(correlation_matrix, fake_treatment_group)
-    assert controls == [], "If the treatment does not exist, the output must be an empty list"
-
-def test_select_controls_fallback(correlation_matrix,cleaned_dataframe):
-    """Should select the `fallback_n` most correlated if no locations meet the min_correlation"""
-    treatment_group = np.random.choice(cleaned_dataframe["location"].unique())  
-    treatment_group = [treatment_group]
-    controls = select_controls(correlation_matrix, treatment_group, min_correlation=0.99, fallback_n=3)
-    
-    assert len(controls) == 3, "It should select 3 fallback controls"
diff --git a/Murray/tests/test_synthetic_control.py b/Murray/tests/test_synthetic_control.py
deleted file mode 100644
index a25e6f7..0000000
--- a/Murray/tests/test_synthetic_control.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import pytest
-import numpy as np
-import pandas as pd
-from Murray.main import SyntheticControl
-from Murray.auxiliary import cleaned_data, market_correlations
-
-@pytest.fixture(scope="module")
-def synthetic_data():
-    """Fixture that creates synthetic test data"""
-    np.random.seed(42)
-    X = np.random.rand(100, 3)  
-    y = X @ np.array([0.3, 0.5, 0.2]) + np.random.normal(0, 0.1, 100)
-    
-    return X, y
-
-@pytest.fixture(scope="module")
-def correlation_matrix(synthetic_data):
-    """Fixture that generates correlation matrix from synthetic data"""
-    return market_correlations(synthetic_data)
-
-@pytest.fixture(scope="module")
-def synthetic_control():
-    """Fixture that creates a synthetic control instance"""
-    return SyntheticControl(
-        regularization_strength_l1=0.1,
-        regularization_strength_l2=0.1,
-        seasonality=None,
-        delta=1.0
-    )
-
-def test_synthetic_control_fit(synthetic_control, synthetic_data):
-    """Test that synthetic control can fit the data"""
-    X, y = synthetic_data
-    synthetic_control.fit(X, y)
-    
-    assert hasattr(synthetic_control, 'is_fitted_')
-    assert hasattr(synthetic_control, 'w_')
-    assert isinstance(synthetic_control.w_, np.ndarray)
-    assert len(synthetic_control.w_) == X.shape[1]
-
-def test_synthetic_control_predict(synthetic_control, synthetic_data):
-    """Test that synthetic control can make predictions"""
-    X, y = synthetic_data
-    synthetic_control.fit(X, y)
-    predictions, weights = synthetic_control.predict(X)
-    
-    assert isinstance(predictions, np.ndarray)
-    assert len(predictions) == len(y)
-    assert not np.isnan(predictions).any()
-    assert isinstance(weights, np.ndarray)
-    assert len(weights) == X.shape[1]
diff --git a/Murray/tests/test_upload_data.py b/Murray/tests/test_upload_data.py
deleted file mode 100644
index 08f044d..0000000
--- a/Murray/tests/test_upload_data.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import os
-import pandas as pd
-import pytest
-import Murray as mp
-
-
-DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data"))
-
-
-tests = [
-    (os.path.join(DATA_DIR, "data1.csv"), "add_to_carts", "region", "date"),
-    (os.path.join(DATA_DIR, "data2.csv"), "sessions", "location", "day"),
-]
-
-
-
-@pytest.mark.parametrize("dataset_path, col_target, col_locations, col_dates", tests)
-def test_cleaned_data(dataset_path, col_target, col_locations, col_dates):
-    
-    assert os.path.exists(dataset_path), f"File {dataset_path} not found"
-    df = pd.read_csv(dataset_path)
-    df_cleaned = mp.cleaned_data(df, col_target, col_locations, col_dates)
-
-    assert isinstance(df_cleaned, pd.DataFrame), "Output is not a DataFrame"
-    assert df_cleaned.isnull().sum().sum() == 0, "Cleaned data contains NaN values"
-
-
diff --git a/README.md b/README.md
index be78874..cfe945b 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ data = pd.DataFrame({
 ```python
 results = run_geo_analysis(
     data = data,
-    excluded_locations = ['mexico city', 'méxico'],
+    excluded_locations = ['mexico city', 'mexico'],
     maximum_treatment_percentage=0.30,
     significance_level = 0.1,
     deltas_range = (0.01, 0.3, 0.02),

From 55bee5dfe5a60db1d32516112c6b40443cd7dd61 Mon Sep 17 00:00:00 2001
From: IsaacMtz19 <isaac@entropy.tech>
Date: Thu, 24 Apr 2025 05:55:33 -0600
Subject: [PATCH 14/14] Add test files

---
 Murray/tests/__init__.py                 |  17 ++++
 Murray/tests/test_better_groups.py       | 113 +++++++++++++++++++++++
 Murray/tests/test_market_correlations.py |  25 +++++
 Murray/tests/test_power_analysis.py      |  90 ++++++++++++++++++
 Murray/tests/test_run_geo_analysis.py    |  38 ++++++++
 Murray/tests/test_run_geo_evaluation.py  |  46 +++++++++
 Murray/tests/test_select_markets.py      |  86 +++++++++++++++++
 Murray/tests/test_synthetic_control.py   |  51 ++++++++++
 Murray/tests/test_upload_data.py         |  27 ++++++
 9 files changed, 493 insertions(+)
 create mode 100644 Murray/tests/__init__.py
 create mode 100644 Murray/tests/test_better_groups.py
 create mode 100644 Murray/tests/test_market_correlations.py
 create mode 100644 Murray/tests/test_power_analysis.py
 create mode 100644 Murray/tests/test_run_geo_analysis.py
 create mode 100644 Murray/tests/test_run_geo_evaluation.py
 create mode 100644 Murray/tests/test_select_markets.py
 create mode 100644 Murray/tests/test_synthetic_control.py
 create mode 100644 Murray/tests/test_upload_data.py

diff --git a/Murray/tests/__init__.py b/Murray/tests/__init__.py
new file mode 100644
index 0000000..05cc6ef
--- /dev/null
+++ b/Murray/tests/__init__.py
@@ -0,0 +1,17 @@
+from Murray.main import run_geo_analysis
+from Murray.post_analysis import run_geo_evaluation
+from Murray.auxiliary import cleaned_data,market_correlations
+from Murray.plots import (
+    plot_geodata,
+    print_locations,
+    print_weights,
+    plot_impact_graphs,
+    print_incremental_results,
+    plot_metrics,
+    plot_impact_graphs_evaluation,
+    print_incremental_results_evaluation,
+    plot_permutation_test
+)
+
+__version__ = "1.1.0"
+
diff --git a/Murray/tests/test_better_groups.py b/Murray/tests/test_better_groups.py
new file mode 100644
index 0000000..ba805f8
--- /dev/null
+++ b/Murray/tests/test_better_groups.py
@@ -0,0 +1,113 @@
+import pytest
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import MinMaxScaler
+from Murray.main import BetterGroups, SyntheticControl, select_treatments, select_controls
+from Murray.auxiliary import market_correlations, cleaned_data
+
+@pytest.fixture(scope="module")
+def cleaned_dataframe():
+    """Fixture that creates synthetic test data"""
+    np.random.seed(42)  
+    
+    dates = pd.date_range(start='2023-01-01', periods=100)
+    regions = ['Region_A', 'Region_B', 'Region_C', 'Region_D', 'Region_E']
+    
+    data = []
+    for region in regions:
+        base_value = np.random.randint(50, 100)
+        for date in dates:
+            value = base_value + np.sin(date.day/15) * 10 + np.random.normal(0, 2)
+            data.append({
+                'date': date,
+                'region': region,
+                'add_to_carts': max(0, int(value))  
+            })
+    
+    df = pd.DataFrame(data)
+    return cleaned_data(df, "add_to_carts", "region", "date")
+
+@pytest.fixture(scope="module")
+def correlation_matrix(cleaned_dataframe):
+    """Fixture that generates the correlation matrix from synthetic data"""
+    return market_correlations(cleaned_dataframe)
+
+@pytest.fixture(scope="module")
+def similarity_matrix(correlation_matrix):
+    """Fixture to generate a similarity matrix"""
+    return correlation_matrix.copy()
+
+@pytest.fixture
+def test_data(cleaned_dataframe):
+    """Fixture to generate test data"""
+    return cleaned_dataframe.copy()
+
+
+def test_better_groups_valid(similarity_matrix, correlation_matrix, test_data):
+    results = BetterGroups(
+        similarity_matrix=similarity_matrix,
+        excluded_locations=[],
+        data=test_data,
+        correlation_matrix=correlation_matrix,
+        maximum_treatment_percentage=0.50
+    )
+
+    assert isinstance(results, dict), "The result must be a dictionary"
+    assert len(results) > 0, "There must be at least one evaluated treatment group"
+    for size, result in results.items():
+        assert "Best Treatment Group" in result, "Missing treatment group"
+        assert "Control Group" in result, "Missing control group"
+        assert "MAPE" in result, "Missing MAPE metric"
+        assert "SMAPE" in result, "Missing SMAPE metric"
+        assert "Holdout Percentage" in result, "Missing holdout percentage"
+        assert result["MAPE"] >= 0, "MAPE must be a positive number"
+        assert 0 <= result["Holdout Percentage"] <= 100, "Holdout must be between 0 and 100"
+
+
+def test_better_groups_no_valid_treatments(similarity_matrix, correlation_matrix, test_data):
+    test_data = test_data[test_data["location"].isin(["X", "Y"])]  
+    print(f"test data: {test_data}")
+    results = BetterGroups(
+        similarity_matrix=similarity_matrix,
+        excluded_locations=[],
+        data=test_data,
+        correlation_matrix=correlation_matrix,
+        maximum_treatment_percentage=0.50
+    )
+
+    assert results is None, "If there are no valid locations, the result must be None"
+
+
+def test_better_groups_scaled_data(similarity_matrix, correlation_matrix, test_data):
+    scaler = MinMaxScaler()
+    test_data["Y"] = scaler.fit_transform(test_data["Y"].values.reshape(-1, 1))  
+
+    results = BetterGroups(
+        similarity_matrix=similarity_matrix,
+        excluded_locations=[],
+        data=test_data,
+        correlation_matrix=correlation_matrix,
+        maximum_treatment_percentage=0.50
+    )
+
+    assert isinstance(results, dict), "The result must be a dictionary"
+    assert all(isinstance(result["MAPE"], (float, int)) for result in results.values()), "MAPE must be a number"
+
+
+def test_better_groups_no_control(monkeypatch, similarity_matrix, correlation_matrix, test_data):
+    
+    def fake_select_controls(correlation_matrix, treatment_group, min_correlation):
+        return []  
+    
+    monkeypatch.setattr("Murray.main.select_controls", fake_select_controls)
+
+    results = BetterGroups(
+        similarity_matrix=similarity_matrix,
+        excluded_locations=[],
+        data=test_data,
+        correlation_matrix=correlation_matrix,
+        maximum_treatment_percentage=0.50
+    )
+
+    for result in results.values():
+        assert result["MAPE"] == float('inf'), "If there are no controls, MAPE must be infinite"
diff --git a/Murray/tests/test_market_correlations.py b/Murray/tests/test_market_correlations.py
new file mode 100644
index 0000000..8bcc404
--- /dev/null
+++ b/Murray/tests/test_market_correlations.py
@@ -0,0 +1,25 @@
+import os
+import pandas as pd
+import pytest
+from Murray.auxiliary import market_correlations,cleaned_data
+
+
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data"))
+
+
+tests = [
+    (os.path.join(DATA_DIR, "data1.csv"), "add_to_carts", "region", "date"),
+    (os.path.join(DATA_DIR, "data2.csv"), "sessions", "location", "day"),
+]
+
+@pytest.fixture
+def cleaned_data_fixture(dataset_path, col_target, col_locations, col_dates):
+    df = pd.read_csv(dataset_path)
+    return cleaned_data(df, col_target, col_locations, col_dates)
+
+
+@pytest.mark.parametrize("dataset_path, col_target, col_locations, col_dates", tests)
+def test_market_correlations(cleaned_data_fixture):
+    
+    correlation_matrix = market_correlations(cleaned_data_fixture)
+    assert isinstance(correlation_matrix, pd.DataFrame), "market_correlations should return a DataFrame"
diff --git a/Murray/tests/test_power_analysis.py b/Murray/tests/test_power_analysis.py
new file mode 100644
index 0000000..ca1f409
--- /dev/null
+++ b/Murray/tests/test_power_analysis.py
@@ -0,0 +1,90 @@
+import pytest
+import numpy as np
+import pandas as pd
+from Murray.main import apply_lift, calculate_conformity, simulate_power, run_simulation, evaluate_sensitivity
+
+@pytest.fixture
+def synthetic_series():
+    """Fixture to generate a synthetic time series."""
+    np.random.seed(42)
+    y = np.random.rand(100) * 100  
+    return y
+
+
+def test_apply_lift(synthetic_series):
+    y = synthetic_series.copy()
+    y_lifted = apply_lift(y, delta=0.1, start_treatment=50, end_treatment=70)
+
+    assert np.all(y_lifted[:50] == y[:50]), "Values before the treatment should not change"
+    assert np.all(y_lifted[70:] == y[70:]), "Values after the treatment should not change"
+    assert np.all(y_lifted[50:70] == y[50:70] * 1.1), "The lift should be applied in the treatment period"
+
+
+def test_calculate_conformity(synthetic_series):
+    y_real = synthetic_series.copy()
+    y_control = synthetic_series.copy() * 0.9  
+
+    conformity = calculate_conformity(y_real, y_control, start_treatment=50, end_treatment=70)
+
+    expected_conformity = np.mean(y_real[50:70]) - np.mean(y_control[50:70])
+    assert np.isclose(conformity, expected_conformity), "The calculated conformity should match the expected value"
+
+
+def test_simulate_power(synthetic_series):
+    y_real = synthetic_series.copy()
+    y_control = synthetic_series.copy() * 0.95  
+
+    delta, power, y_lifted = simulate_power(
+        y_real=y_real,
+        y_control=y_control,
+        delta=0.1,
+        period=20,
+        n_permutations=100,
+        significance_level=0.05
+    )
+
+    assert isinstance(delta, float), "Delta must be a float"
+    assert isinstance(power, float), "Statistical power must be a float"
+    assert isinstance(y_lifted, np.ndarray), "The adjusted series must be a NumPy array"
+    assert len(y_lifted) == len(y_real), "The adjusted series must have the same length as the original"
+
+
+def test_run_simulation(synthetic_series):
+    y_real = synthetic_series.copy()
+    y_control = synthetic_series.copy() * 0.98
+
+    delta, power, y_lifted = run_simulation(
+        delta=0.2,
+        y_real=y_real,
+        y_control=y_control,
+        period=20,
+        n_permutations=100,
+        significance_level=0.05
+    )
+
+    assert isinstance(delta, float), "Delta must be a float"
+    assert isinstance(power, float), "Statistical power must be a float"
+    assert isinstance(y_lifted, np.ndarray), "The adjusted series must be a NumPy array"
+
+
+def test_evaluate_sensitivity():
+    """Test the sensitivity evaluation function"""
+    results_by_size = {
+        50: {"Actual Target Metric (y)": np.random.rand(100) * 100, "Predictions": np.random.rand(100) * 100}
+    }
+    deltas = [0.05, 0.1, 0.2]
+    periods = [10, 20, 30]
+    n_permutations = 50
+
+    sensitivity_results, lift_series = evaluate_sensitivity(
+        results_by_size=results_by_size,
+        deltas=deltas,
+        periods=periods,
+        n_permutations=n_permutations,
+        significance_level=0.05
+    )
+
+    assert isinstance(sensitivity_results, dict), "The result must be a dictionary"
+    assert isinstance(lift_series, dict), "The lift series must be a dictionary"
+    assert all(isinstance(v, dict) for v in sensitivity_results.values()), "Each value in sensitivity_results must be a dictionary"
+    assert all(isinstance(v, np.ndarray) for v in lift_series.values()), "Each value in lift_series must be a NumPy array"
diff --git a/Murray/tests/test_run_geo_analysis.py b/Murray/tests/test_run_geo_analysis.py
new file mode 100644
index 0000000..5d621cf
--- /dev/null
+++ b/Murray/tests/test_run_geo_analysis.py
@@ -0,0 +1,38 @@
+import pytest
+import numpy as np
+import pandas as pd
+from Murray.main import run_geo_analysis_streamlit_app
+from Murray.auxiliary import market_correlations, cleaned_data
+
+@pytest.fixture
+def sample_data():
+    """Fixture that generates a test DataFrame with synthetic data."""
+    np.random.seed(42)
+    data = pd.DataFrame({
+        "time": np.tile(pd.date_range("2023-01-01", periods=100, freq="D"), 10),
+        "location": np.repeat([f"Location_{i}" for i in range(10)], 100),
+        "Y": np.random.rand(1000) * 100
+    })
+    return data
+
+
+def test_run_geo_analysis(sample_data):
+    """Checks that the analysis function runs correctly."""
+    results = run_geo_analysis_streamlit_app(
+        data=sample_data,
+        maximum_treatment_percentage=0.50,
+        significance_level=0.05,
+        deltas_range=(0.05, 0.2, 0.05),
+        periods_range=(10, 30, 10),
+        excluded_locations=["Location_1"],
+        n_permutations=100  
+    )
+
+    assert isinstance(results, dict), "The result must be a dictionary"
+    assert "simulation_results" in results, "Missing 'simulation_results' in the results"
+    assert "sensitivity_results" in results, "Missing 'sensitivity_results' in the results"
+    assert "series_lifts" in results, "Missing 'series_lifts' in the results"
+
+    assert isinstance(results["simulation_results"], dict), "simulation_results must be a dictionary"
+    assert isinstance(results["sensitivity_results"], dict), "sensitivity_results must be a dictionary"
+    assert isinstance(results["series_lifts"], dict), "series_lifts must be a dictionary"
diff --git a/Murray/tests/test_run_geo_evaluation.py b/Murray/tests/test_run_geo_evaluation.py
new file mode 100644
index 0000000..ed6b022
--- /dev/null
+++ b/Murray/tests/test_run_geo_evaluation.py
@@ -0,0 +1,46 @@
+import pytest
+import numpy as np
+import pandas as pd
+from Murray.post_analysis import run_geo_evaluation
+from Murray.auxiliary import market_correlations, cleaned_data
+
+@pytest.fixture
+def sample_data():
+    """Fixture that generates a test DataFrame with fictitious data"""
+    np.random.seed(42)
+    data = pd.DataFrame({
+        "time": np.tile(pd.date_range("2023-01-01", periods=100, freq="D"), 10),
+        "location": np.repeat([f"Location_{i}" for i in range(10)], 100),
+        "Y": np.random.rand(1000) * 100
+    })
+    return data
+
+
+def test_run_geo_evaluation(sample_data):
+    """Checks that the geographic evaluation function runs correctly"""
+    results = run_geo_evaluation(
+        data_input=sample_data,
+        start_treatment="2023-03-01",
+        end_treatment="2023-03-10",
+        treatment_group=["Location_0", "Location_1"],
+        spend=50000,
+        n_permutations=100,  
+        inference_type="iid",
+        significance_level=0.05
+    )
+
+    assert isinstance(results, dict), "The result must be a dictionary"
+    expected_keys = [
+        "MAPE", "SMAPE", "predictions", "treatment", "p_value", "power",
+        "percenge_lift", "control_group", "observed_stat",
+        "null_stats", "weights", "period", "spend", "length_treatment"
+    ]
+    for key in expected_keys:
+        assert key in results, f"Missing the key '{key}' in the results"
+
+    assert isinstance(results["MAPE"], float), "MAPE must be a float"
+    assert isinstance(results["p_value"], float), "p_value must be a float"
+    assert isinstance(results["power"], float), "Power must be a float"
+    assert isinstance(results["control_group"], list), "Control group must be a list"
+    assert 0 <= results["power"] <= 1, "Power must be between 0 and 1"
+    assert 0 <= results["p_value"] <= 1, "p_value must be between 0 and 1"
diff --git a/Murray/tests/test_select_markets.py b/Murray/tests/test_select_markets.py
new file mode 100644
index 0000000..25ca78b
--- /dev/null
+++ b/Murray/tests/test_select_markets.py
@@ -0,0 +1,86 @@
+import pytest
+import numpy as np
+import pandas as pd
+from Murray.main import select_treatments, select_controls
+from Murray.auxiliary import market_correlations, cleaned_data
+
+@pytest.fixture(scope="module")
+def cleaned_dataframe():
+    """Fixture that creates synthetic test data"""
+    np.random.seed(42)  
+    
+    dates = pd.date_range(start='2023-01-01', periods=100)
+    regions = ['Region_A', 'Region_B', 'Region_C', 'Region_D', 'Region_E']
+    
+    data = []
+    for region in regions:
+        base_value = np.random.randint(50, 100)
+        for date in dates:
+            value = base_value + np.sin(date.day/15) * 10 + np.random.normal(0, 2)
+            data.append({
+                'date': date,
+                'region': region,
+                'add_to_carts': max(0, int(value))
+            })
+    
+    df = pd.DataFrame(data)
+    return cleaned_data(df, "add_to_carts", "region", "date")
+
+@pytest.fixture(scope="module")
+def correlation_matrix(cleaned_dataframe):
+    """Fixture that generates the correlation matrix"""
+    return market_correlations(cleaned_dataframe)
+
+def test_select_treatments_valid(cleaned_dataframe, correlation_matrix):
+    """Test to verify that treatments are correctly selected with a randomly excluded location"""
+    excluded_location = np.random.choice(cleaned_dataframe["location"].unique()) 
+    treatments = select_treatments(correlation_matrix, treatment_size=2, excluded_locations=[excluded_location])
+
+    assert isinstance(treatments, list), "The result must be a list"
+    assert all(isinstance(group, list) for group in treatments), "Each combination must be a list"
+    assert all(len(group) == 2 for group in treatments), "Each combination must have 2 treatments"
+    assert excluded_location not in [loc for group in treatments for loc in group], "The excluded location must not appear in the treatments"
+
+def test_select_treatments_invalid_location(correlation_matrix):
+    """Should raise a KeyError if an excluded location is not in the matrix"""
+    with pytest.raises(KeyError, match="not present in the similarity matrix"):
+        select_treatments(correlation_matrix, treatment_size=2, excluded_locations=["X", "Y"])
+
+def test_select_treatments_treatment_size_too_large(correlation_matrix):
+    """Should raise ValueError if treatment_size is greater than the number of available columns"""
+    with pytest.raises(ValueError, match="The treatment size .* exceeds the available number of columns"):
+        select_treatments(correlation_matrix, treatment_size=100, excluded_locations=[])
+
+def test_select_treatments_treatment_size_equals_columns(correlation_matrix):
+    """Should return only one combination when treatment_size is equal to the available columns"""
+    num_columns = correlation_matrix.shape[1]  
+    treatments = select_treatments(correlation_matrix, treatment_size=num_columns, excluded_locations=[])
+    
+    assert len(treatments) == 1, "There must be only one possible combination"
+    assert set(treatments[0]) == set(correlation_matrix.columns), "It must contain all possible locations"
+
+
+def test_select_controls_valid(cleaned_dataframe, correlation_matrix):
+    """Test to verify that controls are correctly selected based on treatments"""
+    excluded_location = np.random.choice(cleaned_dataframe["location"].unique())  
+    treatments = select_treatments(correlation_matrix, treatment_size=2, excluded_locations=[excluded_location])
+    
+    for treatment_group in treatments:
+        controls = select_controls(correlation_matrix, treatment_group)
+        assert isinstance(controls, list), "The result must be a list"
+        assert len(controls) > 0, "There must be at least one control available"
+        assert all(loc not in treatment_group for loc in controls), "Controls must not be in the treatment group"
+
+def test_select_controls_invalid_treatments(correlation_matrix):
+    """Should handle nonexistent treatments without failing"""
+    fake_treatment_group = ["X", "Y", "Z"]  
+    controls = select_controls(correlation_matrix, fake_treatment_group)
+    assert controls == [], "If the treatment does not exist, the output must be an empty list"
+
+def test_select_controls_fallback(correlation_matrix,cleaned_dataframe):
+    """Should select the `fallback_n` most correlated if no locations meet the min_correlation"""
+    treatment_group = np.random.choice(cleaned_dataframe["location"].unique())  
+    treatment_group = [treatment_group]
+    controls = select_controls(correlation_matrix, treatment_group, min_correlation=0.99, fallback_n=3)
+    
+    assert len(controls) == 3, "It should select 3 fallback controls"
diff --git a/Murray/tests/test_synthetic_control.py b/Murray/tests/test_synthetic_control.py
new file mode 100644
index 0000000..a25e6f7
--- /dev/null
+++ b/Murray/tests/test_synthetic_control.py
@@ -0,0 +1,51 @@
+import pytest
+import numpy as np
+import pandas as pd
+from Murray.main import SyntheticControl
+from Murray.auxiliary import cleaned_data, market_correlations
+
+@pytest.fixture(scope="module")
+def synthetic_data():
+    """Fixture that creates synthetic test data"""
+    np.random.seed(42)
+    X = np.random.rand(100, 3)  
+    y = X @ np.array([0.3, 0.5, 0.2]) + np.random.normal(0, 0.1, 100)
+    
+    return X, y
+
+@pytest.fixture(scope="module")
+def correlation_matrix(synthetic_data):
+    """Fixture that generates correlation matrix from synthetic data"""
+    return market_correlations(synthetic_data)
+
+@pytest.fixture(scope="module")
+def synthetic_control():
+    """Fixture that creates a synthetic control instance"""
+    return SyntheticControl(
+        regularization_strength_l1=0.1,
+        regularization_strength_l2=0.1,
+        seasonality=None,
+        delta=1.0
+    )
+
+def test_synthetic_control_fit(synthetic_control, synthetic_data):
+    """Test that synthetic control can fit the data"""
+    X, y = synthetic_data
+    synthetic_control.fit(X, y)
+    
+    assert hasattr(synthetic_control, 'is_fitted_')
+    assert hasattr(synthetic_control, 'w_')
+    assert isinstance(synthetic_control.w_, np.ndarray)
+    assert len(synthetic_control.w_) == X.shape[1]
+
+def test_synthetic_control_predict(synthetic_control, synthetic_data):
+    """Test that synthetic control can make predictions"""
+    X, y = synthetic_data
+    synthetic_control.fit(X, y)
+    predictions, weights = synthetic_control.predict(X)
+    
+    assert isinstance(predictions, np.ndarray)
+    assert len(predictions) == len(y)
+    assert not np.isnan(predictions).any()
+    assert isinstance(weights, np.ndarray)
+    assert len(weights) == X.shape[1]
diff --git a/Murray/tests/test_upload_data.py b/Murray/tests/test_upload_data.py
new file mode 100644
index 0000000..08f044d
--- /dev/null
+++ b/Murray/tests/test_upload_data.py
@@ -0,0 +1,27 @@
+import os
+import pandas as pd
+import pytest
+import Murray as mp
+
+
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data"))
+
+
+tests = [
+    (os.path.join(DATA_DIR, "data1.csv"), "add_to_carts", "region", "date"),
+    (os.path.join(DATA_DIR, "data2.csv"), "sessions", "location", "day"),
+]
+
+
+
+@pytest.mark.parametrize("dataset_path, col_target, col_locations, col_dates", tests)
+def test_cleaned_data(dataset_path, col_target, col_locations, col_dates):
+    
+    assert os.path.exists(dataset_path), f"File {dataset_path} not found"
+    df = pd.read_csv(dataset_path)
+    df_cleaned = mp.cleaned_data(df, col_target, col_locations, col_dates)
+
+    assert isinstance(df_cleaned, pd.DataFrame), "Output is not a DataFrame"
+    assert df_cleaned.isnull().sum().sum() == 0, "Cleaned data contains NaN values"
+
+