# Coursework Assignment: Building a Regression Model

```
University of London
BSc in Computer Science
CM3005, Data Science
Hudson Leonardo MENDES
hlm12@student.london.ac.uk
```


# I. Introduction


## Domain-specific area


## Dataset


## Objectives


# II. Implementation


## Preprocessing


In [None]:
import pathlib

data_folderpath = pathlib.Path("./data")

ppd_folderpath = data_folderpath / "uk-ppd"
inflation_filepath = data_folderpath / "uk-ons/ons-inflation-1989-2022.csv"
interest_filepath = data_folderpath / "uk-boe/boe-interest-1975-2022.csv"


In [None]:
import pandas as pd

pd.set_option("display.float_format", lambda x: "{:,.3f}".format(x))


In [None]:
# https://www.gov.uk/guidance/about-the-price-paid-data
ppd_property_type = {
    "D": "detached",
    "S": "semi-detached",
    "T": "terraced",
    "F": "flat/maisonettes",
    # "O": "other" # => intentionally ommitted
}

ppd_duration = {"F": "freehold", "L": "leasehold"}

ppd_old_or_new = {"Y": "new", "N": "old"}

ppd_df = pd.concat(
    [
        pd.read_csv(
            ppd_filepath,
            compression="zip",
            names=[
                "id",
                "price",
                "date",
                "postcode",
                "property_type",
                "old_or_new",
                "duration",
                "paon",
                "saon",
                "street",
                "locality",
                "town_city",
                "district",
                "county",
                "ppd_category_type",
                "record_status",
            ],
        )
        for ppd_filepath in ppd_folderpath.glob("*.zip")
    ]
)
ppd_df["postgroup"] = ppd_df["postcode"].map(lambda x: str(x).split(" ")[0])
ppd_df["date"] = pd.to_datetime(ppd_df["date"])
ppd_df["property_type"] = ppd_df["property_type"].map(ppd_property_type.get)
ppd_df["duration"] = ppd_df["duration"].map(ppd_duration.get)
ppd_df["old_or_new"] = ppd_df["old_or_new"].map(ppd_old_or_new.get)
ppd_df = ppd_df[
    [
        "date",
        "postgroup",
        "property_type",
        "old_or_new",
        "duration",
        "price",
    ]
]
ppd_df = ppd_df.astype(
    {
        "postgroup": "category",
        "property_type": "category",
        "old_or_new": "category",
        "duration": "category",
        "price": "double",
    }
)
ppd_df = ppd_df.dropna()
ppd_df.sample(n=5)


In [None]:
import re
import string
from datetime import date

inflation_date_pattern = re.compile(r"([\d]{4})(?:\s+([\w]{3}))?")
inflation_month_names = [
    "JAN",
    "FEB",
    "MAR",
    "APR",
    "MAY",
    "JUN",
    "JUL",
    "AUG",
    "SEP",
    "OCT",
    "NOV",
    "DEC",
]
inflation_month_index = {mn: ix + 1 for (ix, mn) in enumerate(inflation_month_names)}
inflation_month_index["Q1"] = 1
inflation_month_index["Q2"] = 4
inflation_month_index["Q3"] = 7
inflation_month_index["Q3"] = 10

inflation_acceptable_numeric_chars = string.digits + ".,"


def extract_inflation_date(x: str) -> date:
    match = next(inflation_date_pattern.finditer(x), None)
    if match:
        group_count = len(match.groups())
        if group_count >= 1:
            year = int(match.group(1))
            month = 1
            month_name = match.group(2)
            if group_count > 1 and month_name:
                month_name = month_name.strip().upper()
                month = inflation_month_index.get(month_name)
            return date(year, month, 1)


def extract_inflation_rate(x: str) -> float:
    x = str(x)
    if all([c in inflation_acceptable_numeric_chars for c in x]):
        return float(x)
    return None


inflation_df = pd.read_csv(inflation_filepath)
inflation_df["date"] = inflation_df["Title"].map(extract_inflation_date)
inflation_df["date"] = pd.to_datetime(inflation_df["date"])
inflation_df["rate"] = inflation_df["CPIH ANNUAL RATE 00: ALL ITEMS 2015=100"].map(
    extract_inflation_rate
)
inflation_df["rate"] = inflation_df["rate"].astype("float", errors="ignore")
inflation_df = inflation_df[["date", "rate"]]
inflation_df = inflation_df.dropna()
inflation_df = inflation_df.set_index("date").sort_index()
inflation_df.sample(n=5)


In [None]:
interest_df = pd.read_csv(interest_filepath)
interest_df["date"] = pd.to_datetime(interest_df["Date Changed"])
interest_df["rate"] = interest_df["Rate"].astype("float")
interest_df = interest_df[["date", "rate"]]
interest_df = interest_df.set_index("date").sort_index()
interest_df.sample(n=5)


In [None]:
from tqdm import tqdm, trange
from typing import Callable
from datetime import date, timedelta

tqdm.pandas()


def build_rate_extractor(df: pd.DataFrame) -> Callable[[date], float]:
    min_date = df.index.min()
    max_date = df.index.max()
    cur_date = min_date
    rate_index = {}
    first_rate = df.rate[0]
    prev_rate = first_rate
    last_rate = df.rate[-1]
    with trange((max_date - min_date).days, desc="rate_index") as pbar:
        while cur_date <= max_date:
            rates = df[df.index == cur_date].rate
            if rates.any():
                new_rate = rates[0] / 100.0
                rate_index[cur_date] = new_rate
                prev_rate = new_rate
            else:
                rate_index[cur_date] = prev_rate
            cur_date += timedelta(days=1)
            pbar.update()

    def get_rate_for_date(d: date) -> float:
        if d < min_date:
            return first_rate
        elif d > max_date:
            return last_rate
        else:
            return rate_index[d]

    return get_rate_for_date


df = ppd_df.copy()
df["inflation_rate"] = df.date.progress_map(build_rate_extractor(df=inflation_df))
df["interest_rate"] = df.date.progress_map(build_rate_extractor(df=interest_df))
df["date_year"] = df.date.progress_map(lambda d: d.year)
df["date_month"] = df.date.progress_map(lambda d: d.month)
df["date_day"] = df.date.progress_map(lambda d: d.day)
df["date_day_of_week"] = df.date.progress_map(lambda d: d.weekday())
df = df.sort_values(by="date").reset_index()
df = df[
    ["date_year", "date_month", "date_day", "date_day_of_week"]
    + list(ppd_df.columns[1:-1])
    + ["inflation_rate", "interest_rate", "price"]
]
df.sample(n=5)


In [None]:
df.to_csv(data_folderpath / "snapshot-Xy-1NF.zip", index=False)


## Statistical Summary


In [None]:
try:
    assert df is not None
except NameError:
    import pathlib
    import pandas as pd
    import numpy as np

    print("[SNAPSHOT] Reloading...")
    pd.set_option("display.float_format", lambda x: "{:,.3f}".format(x))
    data_folderpath = pathlib.Path("./data")
    df = pd.read_csv(data_folderpath / "snapshot-Xy-1NF.zip").astype(
        {
            "postgroup": "category",
            "property_type": "category",
            "old_or_new": "category",
            "duration": "category",
            "price": "double",
        }
    )
    print(f" - reloaded from snapshot, {df.shape[0]}")
df.head()


In [None]:
df.info()


In [None]:
df.describe()


In [None]:
# TODO: Central Tendency


In [None]:
# TODO: Measures of Spread


In [None]:
# TODO: Type of distribution


## Data visualisation


In [None]:
try:
    assert df is not None
except NameError:
    import pathlib
    import pandas as pd

    print("[SNAPSHOT] Reloading...")
    pd.set_option("display.float_format", lambda x: "{:,.3f}".format(x))
    data_folderpath = pathlib.Path("./data")
    df = pd.read_csv(data_folderpath / "snapshot-Xy-1NF.zip").astype(
        {
            "postgroup": "category",
            "property_type": "category",
            "old_or_new": "category",
            "duration": "category",
            "price": "double",
        }
    )
    print(f" - reloaded from snapshot, {df.shape[0]}")
df.head()


In [None]:
%matplotlib inline

In [None]:
import scipy
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter


In [None]:
from datetime import date

max_price = float(df.price.max())
max_rate = max(df.interest_rate.max(), df.inflation_rate.max())
min_intersecting_date = date(df.date_year.min(), 1, 1)
max_intersecting_date = date(df.date_year.max(), 12, 30)


In [None]:
_, axes = plt.subplots(ncols=2, nrows=1, figsize=(15, 5))


def plot_rate_distributions(ax, df: pd.DataFrame, label: str, color: str):
    df = df.copy()
    x = np.linspace(0.0, df["rate"].max(), 100)
    df["bin"] = pd.cut(df["rate"], bins=x)
    y = list(df.groupby("bin").count()["rate"])
    ax.fill_between(x[:-1], 0.0, y, color=color, alpha=0.5)
    ax.xaxis.set_major_formatter(FormatStrFormatter("%2.2f%%"))
    intervals = [0.05, 0.95]
    for interval, quantile in zip(intervals, df.rate.quantile(intervals)):
        percentile = f"P{int(interval*100.)}={round(quantile, 2)}"
        bbox = dict(boxstyle="round, pad=0.3", fc="lightgray", lw=2)
        ax.axvline(x=quantile, color="blue")
        ax.annotate(
            percentile,
            xy=(quantile, max(y)),
            bbox=bbox,
            ha="center",
            va="center",
        )
    ax.axvline(x=quantile, color="blue")
    ax.legend([label], loc="lower center", bbox_to_anchor=(0.5, -0.2))


plot_rate_distributions(
    ax=axes[0],
    df=interest_df,
    label="interest",
    color="green",
)

plot_rate_distributions(
    ax=axes[1],
    df=inflation_df,
    label="inflation",
    color="red",
)


In [None]:
from datetime import date
from tqdm import tqdm

_, axes = plt.subplots(nrows=2, figsize=(15, 10), sharex=True)

series = df.copy()
series["date"] = df.apply(lambda r: date(r.date_year, r.date_month, r.date_day), axis=1)
series = series.groupby("date").mean(numeric_only=True).dropna()

x = series.index

axes[0].grid(visible=True)
axes[0].plot(x, series.interest_rate * 100.0, "g.-", alpha=0.7)
axes[0].plot(x, series.inflation_rate * 100.0, "r.-", alpha=0.7)
axes[0].set_xlim(left=min_intersecting_date, right=max_intersecting_date)
axes[0].set_ylabel("rates (%)")
axes[0].yaxis.set_major_formatter(FormatStrFormatter("%2.2f%%"))
axes[0].legend(["interest", "inflation"])

axes[1].grid(visible=True)
axes[1].yaxis.set_major_formatter(StrMethodFormatter("{x:,}"))
axes[1].set_ylim(0.0, df.price.quantile(0.95) * 1.2)
axes[1].set_ylabel("property price (£)")
for ix, property_type in tqdm(list(enumerate(ppd_property_type.values()))):
    sub_series = df[df.property_type == property_type].copy()
    sub_series["date_ym"] = sub_series.apply(
        lambda r: date(r.date_year, r.date_month, 1), axis=1
    )
    sub_series = sub_series[["date_ym", "price"]]
    sub_series = sub_series.groupby("date_ym").mean(numeric_only=True)
    sub_series = sub_series.fillna(method="ffill")
    axes[1].plot(sub_series.index, sub_series.price, "s", alpha=0.7)
    axes[1].legend(ppd_property_type.values())


## Machine learning model


In [1]:
try:
    assert df is not None
except NameError:
    import pathlib
    import pandas as pd
    import numpy as np

    print("[SNAPSHOT] Reloading...")
    pd.set_option("display.float_format", lambda x: "{:,.3f}".format(x))
    data_folderpath = pathlib.Path("./data")
    df = pd.read_csv(data_folderpath / "snapshot-Xy-1NF.zip").astype(
        {
            "postgroup": "category",
            "property_type": "category",
            "old_or_new": "category",
            "duration": "category",
            "price": "double",
        }
    )
    print(f" - reloaded from snapshot, {df.shape[0]}")
df.head()


[SNAPSHOT] Reloading...
 - reloaded from snapshot, 4336841


Unnamed: 0,date_year,date_month,date_day,date_day_of_week,postgroup,property_type,old_or_new,duration,inflation_rate,interest_rate,price
0,2018,1,1,0,OL12,semi-detached,old,freehold,0.027,0.005,123500.0
1,2018,1,1,0,CF40,terraced,old,freehold,0.027,0.005,45000.0
2,2018,1,1,0,B63,flat/maisonettes,old,leasehold,0.027,0.005,75000.0
3,2018,1,1,0,SO15,terraced,old,freehold,0.027,0.005,193900.0
4,2018,1,1,0,CO6,semi-detached,old,freehold,0.027,0.005,215000.0


In [2]:
X, y = df[df.columns[:-1]], df[df.columns[-1]]


In [3]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler, FunctionTransformer
from sklearn.compose import make_column_transformer, make_column_selector


def make_sine_cycle_encoder(period: int = 1) -> float:
    assert period != 0
    return FunctionTransformer(lambda x: np.sin(x / period * 2 * np.pi))


# https://scikit-learn.org/stable/modules/sgd.html#tips-on-practical-use
def make_df_column_transformer():
    categorical_selector = make_column_selector(dtype_include="category")
    float_seletor = make_column_selector(dtype_include="float64")
    one_hot = OneHotEncoder(sparse_output=True, handle_unknown="ignore")
    numerical_scaler = StandardScaler(with_mean=True, with_std=True)
    cycle_sine_12 = make_sine_cycle_encoder(period=12)
    cycle_sine_31 = make_sine_cycle_encoder(period=31)
    cycle_sine_6 = make_sine_cycle_encoder(period=6)
    return make_column_transformer(
        (one_hot, categorical_selector),
        (numerical_scaler, float_seletor),
        (numerical_scaler, ["date_year"]),
        (cycle_sine_12, ["date_month"]),
        (cycle_sine_31, ["date_day"]),
        (cycle_sine_6, ["date_day_of_week"]),
        remainder="drop",
        verbose=True,
    )


preprocessing_df_column_transformer = make_df_column_transformer()
X_encoded = preprocessing_df_column_transformer.fit_transform(X)
pd.DataFrame.sparse.from_spmatrix(X_encoded)


[ColumnTransformer] . (1 of 6) Processing onehotencoder, total=   4.4s
[ColumnTransformer]  (2 of 6) Processing standardscaler-1, total=   0.1s
[ColumnTransformer]  (3 of 6) Processing standardscaler-2, total=   0.1s
[ColumnTransformer]  (4 of 6) Processing functiontransformer-1, total=   0.1s
[ColumnTransformer]  (5 of 6) Processing functiontransformer-2, total=   0.1s
[ColumnTransformer]  (6 of 6) Processing functiontransformer-3, total=   0.2s


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2306,2307,2308,2309,2310,2311,2312,2313,2314,2315
0,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,1.000,0.000,0.089,0.039,-1.361,0.500,0.201,0.000
1,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,1.000,0.000,0.089,0.039,-1.361,0.500,0.201,0.000
2,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,0.000,1.000,0.089,0.039,-1.361,0.500,0.201,0.000
3,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,1.000,0.000,0.089,0.039,-1.361,0.500,0.201,0.000
4,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,1.000,0.000,0.089,0.039,-1.361,0.500,0.201,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4336836,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,0.000,1.000,3.504,4.581,1.617,-0.866,-0.938,0.866
4336837,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,1.000,0.000,3.504,4.581,1.617,-0.866,-0.849,0.866
4336838,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,1.000,0.000,3.504,4.581,1.617,-0.866,-0.849,0.866
4336839,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.000,1.000,1.000,0.000,3.504,4.581,1.617,-0.866,-0.571,-0.866


In [4]:
from typing import Tuple
from sklearn.model_selection import train_test_split


def produce_split_summary(
    X_split: pd.DataFrame, y_split: pd.DataFrame, name: str, total: int
) -> Tuple[str, int, int, str]:
    return (
        name,
        X_split.shape[0],
        y_split.shape[0],
        "{:.1f}%".format(100.0 * X_split.shape[0] / total),
    )


r = 42
train_size = 0.99
X1, X2, y1, y2 = train_test_split(X_encoded, y, train_size=train_size, random_state=r)
pd.DataFrame(
    [
        produce_split_summary(X, y, "full", total=X.shape[0]),
        produce_split_summary(X1, y1, "train", total=X.shape[0]),
        produce_split_summary(X2, y2, "test", total=X.shape[0]),
    ],
    columns=["split", "|X|", "|y|", "%"],
)


Unnamed: 0,split,|X|,|y|,%
0,full,4336841,4336841,100.0%
1,train,4293472,4293472,99.0%
2,test,43369,43369,1.0%


In [5]:
from typing import Tuple
from sklearn.neural_network import MLPRegressor


def make_model(hidden_layer_sizes: Tuple[int, ...], max_iter: int):
    r = 42
    return MLPRegressor(
        hidden_layer_sizes=hidden_layer_sizes,
        validation_fraction=0.01,
        random_state=r,
        verbose=True,
        max_iter=max_iter,
    )


model = make_model(hidden_layer_sizes=(2,), max_iter=1)
model.fit(X1, y1)
pd.DataFrame(
    [
        ("train", model.score(X1, y1)),
        ("test", model.score(X2, y2)),
    ],
    columns=["spit", "score"],
)


Iteration 1, loss = 129946195645.34506226




Unnamed: 0,spit,score
0,train,-0.624
1,test,-0.502


In [6]:
import itertools
import numpy as np
from typing import Set, List, Tuple
from sklearn.model_selection import GridSearchCV


def make_grid_hidden_layer_tuples(
    max_l1_power: int,
    max_l2_power: int,
) -> List[Tuple[int, ...]]:
    assert max_l1_power > 1, "l1 must have no less than 2^1 perceptrons"
    assert max_l2_power > 0, "when present, l2 can have no less than 2^0 perceptrons"
    configs: Set[Tuple[int, ...]] = set()
    l1_power_range = list(np.arange(1, max_l1_power+1))
    l2_power_range = list(np.arange(1, max_l2_power+1))
    l1_l2_cartesian_product = itertools.product(l1_power_range, l2_power_range)
    for l1_power in l1_power_range:
        configs.add((2**l1_power,))
    for l1_power, l2_power in l1_l2_cartesian_product:
        configs.add((2**l1_power, 2**l2_power))
    return sorted(configs)


grid = GridSearchCV(
    model,
    param_grid={
        "hidden_layer_sizes": make_grid_hidden_layer_tuples(4, 2),
        "max_iter": [10],
    },
    cv=3,
    verbose=True,
)
grid


In [8]:
grid.fit(X1, y1)
grid.best_params_

Fitting 3 folds for each of 12 candidates, totalling 36 fits
Iteration 1, loss = 124830286482.92813110
Iteration 2, loss = 124087534972.05619812
Iteration 3, loss = 122761248351.79388428
Iteration 4, loss = 120888345748.02589417
Iteration 5, loss = 118513819716.17665100
Iteration 6, loss = 115685989347.30360413
Iteration 7, loss = 112462400795.03923035
Iteration 8, loss = 108914152303.35237122
Iteration 9, loss = 105128001658.32881165
Iteration 10, loss = 101177786704.86152649




Iteration 1, loss = 131775148953.54466248
Iteration 2, loss = 131033854928.49105835
Iteration 3, loss = 129708937105.20205688
Iteration 4, loss = 127838367793.78129578
Iteration 5, loss = 125465631138.13539124
Iteration 6, loss = 122640413669.09478760
Iteration 7, loss = 119417746733.78684998
Iteration 8, loss = 115873877365.35295105
Iteration 9, loss = 112086200570.77911377
Iteration 10, loss = 108138210173.43983459




Iteration 1, loss = 133715803753.06814575
Iteration 2, loss = 132974469025.20831299
Iteration 3, loss = 131650738986.65284729
Iteration 4, loss = 129779905862.49253845
Iteration 5, loss = 127407771444.72731018
Iteration 6, loss = 124580133819.76199341
Iteration 7, loss = 121356157116.68882751
Iteration 8, loss = 117814722123.86485291
Iteration 9, loss = 114025783633.95823669
Iteration 10, loss = 110081880737.39007568




Iteration 1, loss = 124236364648.22491455
Iteration 2, loss = 96387125078.08433533
Iteration 3, loss = 72372211098.74945068
Iteration 4, loss = 68806071295.74662781
Iteration 5, loss = 66622187641.44075012
Iteration 6, loss = 64554512590.80952454
Iteration 7, loss = 62242126486.75620270
Iteration 8, loss = 59932856331.74103546
Iteration 9, loss = 58041468435.37225342
Iteration 10, loss = 56674259205.56139374




Iteration 1, loss = 131864918486.72239685
Iteration 2, loss = 131536617439.64321899
Iteration 3, loss = 130940957334.30990601
Iteration 4, loss = 130089590747.71334839
Iteration 5, loss = 128989918017.32218933
Iteration 6, loss = 127652804147.93243408
Iteration 7, loss = 126088114956.12570190
Iteration 8, loss = 124310483572.82542419
Iteration 9, loss = 122336858137.08219910
Iteration 10, loss = 120183305693.39410400




Iteration 1, loss = 132568844180.65184021
Iteration 2, loss = 109272648024.83769226
Iteration 3, loss = 83409675967.99601746
Iteration 4, loss = 79425539997.12681580
Iteration 5, loss = 77198974048.47286987
Iteration 6, loss = 75637551699.11759949
Iteration 7, loss = 74151792356.92430115
Iteration 8, loss = 72517998751.08335876
Iteration 9, loss = 70765704342.11015320
Iteration 10, loss = 69022176331.06436157




Iteration 1, loss = 117515597578.22384644
Iteration 2, loss = 82418196713.72044373
Iteration 3, loss = 71708892141.94647217
Iteration 4, loss = 68997591930.53086853
Iteration 5, loss = 67186709352.89463043
Iteration 6, loss = 65556932666.09533691
Iteration 7, loss = 63758852818.74778748
Iteration 8, loss = 61756876418.53464508
Iteration 9, loss = 59806486374.22587585
Iteration 10, loss = 58207954778.05508423




Iteration 1, loss = 126666553612.24702454
Iteration 2, loss = 95722033534.76936340
Iteration 3, loss = 79772003782.25621033
Iteration 4, loss = 76972844119.23652649
Iteration 5, loss = 75056059001.75180054
Iteration 6, loss = 73598739852.36347961
Iteration 7, loss = 72177947418.51100159
Iteration 8, loss = 70623334468.84890747
Iteration 9, loss = 68905986665.15412903
Iteration 10, loss = 67233013404.39627838




Iteration 1, loss = 128604927040.08221436
Iteration 2, loss = 97645205440.13941956
Iteration 3, loss = 81666472261.60209656
Iteration 4, loss = 78860234805.94161987
Iteration 5, loss = 76932085399.59199524
Iteration 6, loss = 75485570973.22329712
Iteration 7, loss = 74046358966.75100708
Iteration 8, loss = 72486649487.46809387
Iteration 9, loss = 70781048398.56573486
Iteration 10, loss = 69092425364.74711609




Iteration 1, loss = 124836320705.43164062
Iteration 2, loss = 124105448663.08364868
Iteration 3, loss = 122790128581.85160828
Iteration 4, loss = 120928972430.00918579
Iteration 5, loss = 118562626542.75910950
Iteration 6, loss = 115741566162.64810181
Iteration 7, loss = 112524643375.37693787
Iteration 8, loss = 108986771092.30848694
Iteration 9, loss = 105201037360.61917114
Iteration 10, loss = 101254159203.30688477




Iteration 1, loss = 131781255651.32051086
Iteration 2, loss = 131051069816.98878479
Iteration 3, loss = 129737186333.31292725
Iteration 4, loss = 127877611912.85206604
Iteration 5, loss = 125514370673.58100891
Iteration 6, loss = 122695353545.32258606
Iteration 7, loss = 119483091709.34849548
Iteration 8, loss = 115942982057.05834961
Iteration 9, loss = 112157358135.11621094
Iteration 10, loss = 108215239542.25213623




Iteration 1, loss = 133721922119.90147400
Iteration 2, loss = 132990924821.28218079
Iteration 3, loss = 131677655401.94203186
Iteration 4, loss = 129817645882.70385742
Iteration 5, loss = 127455261084.86761475
Iteration 6, loss = 124637110368.29272461
Iteration 7, loss = 121425054621.42764282
Iteration 8, loss = 117884837561.03421021
Iteration 9, loss = 114098576124.00296021
Iteration 10, loss = 110153753679.55886841




Iteration 1, loss = 124985922503.38796997
Iteration 2, loss = 124981388450.43879700
Iteration 3, loss = 124976855434.72967529
Iteration 4, loss = 124972322280.45765686
Iteration 5, loss = 124967789535.44671631
Iteration 6, loss = 124963254462.95983887
Iteration 7, loss = 124958725727.79652405
Iteration 8, loss = 124954194228.65174866
Iteration 9, loss = 124949661294.28694153
Iteration 10, loss = 124945129706.64427185




Iteration 1, loss = 131930727719.73588562
Iteration 2, loss = 131926192888.13061523
Iteration 3, loss = 131921657183.51791382
Iteration 4, loss = 131917121334.78271484
Iteration 5, loss = 131912591623.20144653
Iteration 6, loss = 131908057675.11274719
Iteration 7, loss = 131903521573.53535461
Iteration 8, loss = 131898989402.75111389
Iteration 9, loss = 131894455942.58581543
Iteration 10, loss = 131889925359.82275391




Iteration 1, loss = 133871414590.00161743
Iteration 2, loss = 133866878212.20703125
Iteration 3, loss = 133862343617.79426575
Iteration 4, loss = 133857806257.41773987
Iteration 5, loss = 133736157975.98997498
Iteration 6, loss = 120913047223.60853577
Iteration 7, loss = 88742820946.81266785
Iteration 8, loss = 80694228438.45892334
Iteration 9, loss = 78214743758.56144714
Iteration 10, loss = 76500792788.29118347




Iteration 1, loss = 115463315004.39649963
Iteration 2, loss = 78879302452.98794556
Iteration 3, loss = 70920191490.87364197
Iteration 4, loss = 68306268222.21558380
Iteration 5, loss = 66486744985.71060181
Iteration 6, loss = 64647858745.08663177
Iteration 7, loss = 62545391248.78893280
Iteration 8, loss = 60365456861.81955719
Iteration 9, loss = 58495318085.62915802
Iteration 10, loss = 57121109649.52794647




Iteration 1, loss = 122422973376.31385803
Iteration 2, loss = 85839333414.08764648
Iteration 3, loss = 77803318833.69296265
Iteration 4, loss = 75202747801.35908508
Iteration 5, loss = 73380625395.07757568
Iteration 6, loss = 71576967997.87904358
Iteration 7, loss = 69503870877.69923401
Iteration 8, loss = 67404610508.89488220
Iteration 9, loss = 65526781755.32890320
Iteration 10, loss = 64136275330.99365997




Iteration 1, loss = 124363546856.28405762
Iteration 2, loss = 87755818943.02082825
Iteration 3, loss = 79707635557.95428467
Iteration 4, loss = 77091939629.93315125
Iteration 5, loss = 75264407034.13099670
Iteration 6, loss = 73468215699.39068604
Iteration 7, loss = 71379865127.23489380
Iteration 8, loss = 69272962897.19908142
Iteration 9, loss = 67384620248.55540466
Iteration 10, loss = 65971632583.23898315




Iteration 1, loss = 124090212531.98536682
Iteration 2, loss = 119867370523.59092712
Iteration 3, loss = 112720087841.77659607
Iteration 4, loss = 103651837641.64413452
Iteration 5, loss = 93922156297.39439392
Iteration 6, loss = 84975705929.08099365
Iteration 7, loss = 78262269559.21670532
Iteration 8, loss = 74581152625.09765625
Iteration 9, loss = 73102340410.36605835
Iteration 10, loss = 72155340339.73506165




Iteration 1, loss = 131035450577.82682800
Iteration 2, loss = 126816325842.24220276
Iteration 3, loss = 119691454211.66090393
Iteration 4, loss = 110617834843.53369141
Iteration 5, loss = 100882056821.89360046
Iteration 6, loss = 91933964589.48237610
Iteration 7, loss = 85216033253.75907898
Iteration 8, loss = 81521511628.64721680
Iteration 9, loss = 80013884420.76190186
Iteration 10, loss = 79064871740.05729675




Iteration 1, loss = 132977148172.04339600
Iteration 2, loss = 128758978698.39556885
Iteration 3, loss = 121633520626.31825256
Iteration 4, loss = 112557384858.54319763
Iteration 5, loss = 102824020518.43115234
Iteration 6, loss = 93870155937.65893555
Iteration 7, loss = 87133317660.18243408
Iteration 8, loss = 83426035865.29217529
Iteration 9, loss = 81913264632.62884521
Iteration 10, loss = 80961107317.31297302




Iteration 1, loss = 111365905905.11849976
Iteration 2, loss = 74974350127.36369324
Iteration 3, loss = 69539820376.29696655
Iteration 4, loss = 67066641024.62052155
Iteration 5, loss = 64926007344.76058197
Iteration 6, loss = 62471973405.19480896
Iteration 7, loss = 59957060125.02518463
Iteration 8, loss = 57957743364.23222351
Iteration 9, loss = 56552743294.05723572
Iteration 10, loss = 55526440278.45391846




Iteration 1, loss = 118328376226.66360474
Iteration 2, loss = 81888684255.04776001
Iteration 3, loss = 76429927645.05487061
Iteration 4, loss = 73940747432.09065247
Iteration 5, loss = 71818970007.22769165
Iteration 6, loss = 69453656865.21847534
Iteration 7, loss = 66992725444.18756104
Iteration 8, loss = 64972236152.21999359
Iteration 9, loss = 63532346657.67842865
Iteration 10, loss = 62491091916.18412018




Iteration 1, loss = 120273823578.01194763
Iteration 2, loss = 83791372605.22406006
Iteration 3, loss = 78318933657.55349731
Iteration 4, loss = 75820814312.96865845
Iteration 5, loss = 73690020931.99322510
Iteration 6, loss = 71310368390.40191650
Iteration 7, loss = 68836229946.21699524
Iteration 8, loss = 66805736930.86334991
Iteration 9, loss = 65365832256.56623077
Iteration 10, loss = 64300082125.20735931




Iteration 1, loss = 101190606679.78709412
Iteration 2, loss = 70947711015.21347046
Iteration 3, loss = 67102726500.97877502
Iteration 4, loss = 64064298322.94711304
Iteration 5, loss = 60612198620.36365509
Iteration 6, loss = 57734844256.12177277
Iteration 7, loss = 55840789080.02976990
Iteration 8, loss = 54494958809.76595306
Iteration 9, loss = 53390667402.73776245
Iteration 10, loss = 52394775753.83022308




Iteration 1, loss = 108011011530.48199463
Iteration 2, loss = 77850561911.73858643
Iteration 3, loss = 74038138613.42051697
Iteration 4, loss = 71056979888.42411804
Iteration 5, loss = 67683601496.44116974
Iteration 6, loss = 64846844310.94158936
Iteration 7, loss = 62897833704.43992615
Iteration 8, loss = 61523487555.37033844
Iteration 9, loss = 60404028512.75385284
Iteration 10, loss = 59398280467.32635498




Iteration 1, loss = 110050034681.88601685
Iteration 2, loss = 79745014553.81008911
Iteration 3, loss = 75915411953.35942078
Iteration 4, loss = 72932477807.13632202
Iteration 5, loss = 69479966394.61991882
Iteration 6, loss = 66630287303.24523163
Iteration 7, loss = 64664517369.15148163
Iteration 8, loss = 63308136979.53083038
Iteration 9, loss = 62196034594.87266541
Iteration 10, loss = 61210262191.74716187




Iteration 1, loss = 123665745003.57063293
Iteration 2, loss = 117489319982.15780640
Iteration 3, loss = 107442155885.70742798
Iteration 4, loss = 95566694612.85227966
Iteration 5, loss = 84365071547.17100525
Iteration 6, loss = 76724035723.58331299
Iteration 7, loss = 73549723906.76524353
Iteration 8, loss = 72218258372.98153687
Iteration 9, loss = 71137202709.24365234
Iteration 10, loss = 70190691182.19238281




Iteration 1, loss = 130613595924.97334290
Iteration 2, loss = 124452381228.81834412
Iteration 3, loss = 114412090603.67022705
Iteration 4, loss = 102521610746.98164368
Iteration 5, loss = 91321125067.93893433
Iteration 6, loss = 83641346472.02090454
Iteration 7, loss = 80460117693.09550476
Iteration 8, loss = 79136035126.27342224
Iteration 9, loss = 78049467793.96929932
Iteration 10, loss = 77108304067.31054688




Iteration 1, loss = 132557013574.33990479
Iteration 2, loss = 126398038493.33647156
Iteration 3, loss = 116356250068.26155090
Iteration 4, loss = 104478575011.74166870
Iteration 5, loss = 93283549867.68176270
Iteration 6, loss = 85569642613.69943237
Iteration 7, loss = 82371394787.86305237
Iteration 8, loss = 81035909600.70603943
Iteration 9, loss = 79946238898.75186157
Iteration 10, loss = 79001831001.06178284




Iteration 1, loss = 124985478519.33375549
Iteration 2, loss = 124980944963.32568359
Iteration 3, loss = 124976410538.40550232
Iteration 4, loss = 124971878399.78311157
Iteration 5, loss = 124967344605.16751099
Iteration 6, loss = 124962812330.70935059
Iteration 7, loss = 124958279741.34210205
Iteration 8, loss = 124953746825.48844910
Iteration 9, loss = 124949217094.94790649
Iteration 10, loss = 124944683874.63838196




Iteration 1, loss = 131930281381.04046631
Iteration 2, loss = 131925747733.03018188
Iteration 3, loss = 131921212104.41706848
Iteration 4, loss = 131916677026.07279968
Iteration 5, loss = 131912143776.62605286
Iteration 6, loss = 131907609779.15498352
Iteration 7, loss = 131903075416.63151550
Iteration 8, loss = 131898541271.85794067
Iteration 9, loss = 131894009088.35922241
Iteration 10, loss = 131889476045.07356262




Iteration 1, loss = 133870967441.10452271
Iteration 2, loss = 133866430908.36830139
Iteration 3, loss = 133861896982.62982178
Iteration 4, loss = 133857361300.37638855
Iteration 5, loss = 133852825329.46957397
Iteration 6, loss = 133848289966.92251587
Iteration 7, loss = 133843753745.35826111
Iteration 8, loss = 133839219246.58795166
Iteration 9, loss = 133834685013.78202820
Iteration 10, loss = 133830150731.00544739




In [None]:
model = make_model(
    hidden_layer_sizes=grid.best_params_["hidden_layer_sizes"],
    max_iter=200,
)
model


In [None]:
model.fit(X1, y1)


In [None]:
model.score(X2, y2)


# III. Conclusions


## Performance of results


## Closing remarks/statements
