# Correlation vs Causation

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import matplotlib.pyplot as plt

np.random.seed(42)

months = np.arange(1, 13)
Season = np.array([0,0,0,1,1,1,1,1,0,0,0,0])
AdSpend = 50 + 30*Season + np.random.normal(0,5,12)
Sales = 200 + 80*Season + 0.3*AdSpend + np.random.normal(0,10,12)

df = pd.DataFrame({
    "Month": months,
    "Season": Season,
    "AdSpend": AdSpend,
    "Sales": Sales
})

df

## Scatter Plot

In [None]:
sns.scatterplot(data=df, x="AdSpend", y="Sales", s=100)
plt.title("Sales vs AdSpend")
plt.show()

## Regression Analysis

- Dependent Variable: Sales
- Independent Variable: Advertising Spend

In [None]:
X1 = sm.add_constant(df["AdSpend"])
model1 = sm.OLS(df["Sales"], X1).fit()
model1.summary()

## Controlling Confounding Variables
- Season

In [None]:
X2 = sm.add_constant(df[["AdSpend", "Season"]])
model2 = sm.OLS(df["Sales"], X2).fit()
model2.summary()

## Visualization

In [None]:
sns.scatterplot(data=df, x="AdSpend", y="Sales", hue="Season", s=100)
plt.title("Sales vs AdSpend (Confounded by Season)")
plt.show()

## Within Season

In [None]:
sns.lmplot(data=df, x="AdSpend", y="Sales", hue="Season")
plt.title("Advertising Effects After Removing Confounding")
plt.show()

## Discussion
- Advertising spend shows a strong __correlation__ with sales figures.
- However, it is important to note that correlation does not imply __causation__. 
- Other factors, such as __seasonal trends__, may also influence sales. 
- Further analysis is needed to establish a causal relationship.