In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


file_path = "usaSmall.csv"
df = pd.read_csv(file_path)

# Filter data and transform values for normalization
df = df[['Visits (000s)', 'Spend']].dropna()
df['Log_Visits'] = np.log1p(df['Visits (000s)'])
df['Log_Spend'] = np.log1p(df['Spend']) 


X = df['Log_Visits'].values.reshape(-1, 1)  # Exp. var
y = df['Log_Spend'].values  # Res. var

# fit model
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)

# Calculate residuals
residuals = y - y_pred


sns.set_style("whitegrid")
plt.figure(figsize=(10, 6))
sns.regplot(
    x='Log_Visits',
    y='Log_Spend',
    data=df,
    scatter_kws={'color': 'blue', 'alpha': 0.6},
    line_kws={'color': 'red'},
    ci=95
)
plt.title("Log Transformed Linear Regression (Spending vs Visits)", fontsize=16, fontweight='bold')
plt.xlabel("Visits (thousands)", fontsize=12, fontweight='bold')
plt.ylabel("Spending (per million pounds)", fontsize=12, fontweight='bold')
plt.tight_layout()


In [3]:
slope = model.coef_[0]
intercept = model.intercept_
print(f"Regression Equation: Log(Spend) = {intercept:.4f} + {slope:.4f} * Log(Visits)")

Regression Equation: Log(Spend) = 0.0589 + 0.8594 * Log(Visits)
