In [1]:
import logging
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression

root_path = Path().absolute().parents[1]
figures_path = root_path / "common" / "resources" / "images" / "intro"

In [2]:
data = pd.read_csv("anscombe.csv")
data

Unnamed: 0,dataset,x,y
0,I,10.0,8.04
1,I,8.0,6.95
2,I,13.0,7.58
3,I,9.0,8.81
4,I,11.0,8.33
5,I,14.0,9.96
6,I,6.0,7.24
7,I,4.0,4.26
8,I,12.0,10.84
9,I,7.0,4.82


In [1]:
sns.set_style("whitegrid")

fig, axs = plt.subplots(2, 2, figsize=(10, 6))

for (dataset_number, dataset), ax in zip(data.groupby('dataset'), axs.flat):
    print(f"Dataset {dataset_number}")

    model = LinearRegression()
    model.fit(dataset['x'].to_numpy().reshape(-1, 1), dataset['y'].to_numpy())

    print(f"Coefficient: {model.coef_[0]:.4f}")
    print(f"Intercept: {model.intercept_:.3f}")

    x = np.linspace(2, 20, 100).reshape(-1, 1)
    y_pred = model.predict(x)

    sns.lineplot(x=x.flatten(), y=y_pred, ax=ax, color="C1", zorder=5)

    sns.scatterplot(x='x', y='y', data=dataset, ax=ax, zorder=10)

    ax.set_title(f"Dataset {dataset_number}")

    ax.set_xlim(2, 20)
    ax.set_xticks(range(4, 19, 2))

    ax.set_ylim(2, 14)
    ax.set_yticks(range(4, 13, 2))

    print()

fig.tight_layout()
fig.savefig(figures_path / "anscombe.svg")

NameError: name 'sns' is not defined