In [1]:
import random

import numpy as np
import pandas as pd
import seaborn as sns

from algorithm_gd import forward, loss_fn, r2_score, gradient_m, gradient_c, get_iteration_vs_accuracy_data

In [2]:
df = pd.read_csv("SOCR-HeightWeight.csv")
df.rename(columns={"Height(Inches)": "X", "Weight(Pounds)": "y"}, inplace=True)
df

Unnamed: 0,Index,X,y
0,1,65.78331,112.9925
1,2,71.51521,136.4873
2,3,69.39874,153.0269
3,4,68.21660,142.3354
4,5,67.78781,144.2971
...,...,...,...
24995,24996,69.50215,118.0312
24996,24997,64.54826,120.1932
24997,24998,64.69855,118.2655
24998,24999,67.52918,132.2682


In [49]:
no_of_nodes = 4

In [50]:
node_data_partitions = np.array_split(df, no_of_nodes)
node_data_partitions

  return bound(*args, **kwds)


[      Index         X         y        X_sqr            Xy
 0         1  65.78331  112.9925  4327.443875   7433.020655
 1         2  71.51521  136.4873  5114.425261   9760.917922
 2         3  69.39874  153.0269  4816.185114  10619.874046
 3         4  68.21660  142.3354  4653.504516   9709.637048
 4         5  67.78781  144.2971  4595.187185   9781.584398
 ...     ...       ...       ...          ...           ...
 6245   6246  68.55917  131.6865  4700.359791   9028.317140
 6246   6247  69.82956  152.9133  4876.167450  10677.868457
 6247   6248  68.56366  135.9443  4700.975473   9320.838764
 6248   6249  69.89716  135.4161  4885.612976   9465.200808
 6249   6250  67.22911  126.9908  4519.753231   8537.478462
 
 [6250 rows x 5 columns],
        Index         X          y        X_sqr            Xy
 6250    6251  68.27784  151.49860  4661.863435  10343.997171
 6251    6252  67.70956  120.92840  4584.584515   8188.008756
 6252    6253  68.63497  143.90900  4710.759107   9877.189898
 625

In [53]:
for i, node_data in enumerate(node_data_partitions):
    df.loc[node_data.index, "node"] = i

df["node"] = df["node"].astype("int")
df

Unnamed: 0,Index,X,y,X_sqr,Xy,node
0,1,65.78331,112.9925,4327.443875,7433.020655,0
1,2,71.51521,136.4873,5114.425261,9760.917922,0
2,3,69.39874,153.0269,4816.185114,10619.874046,0
3,4,68.21660,142.3354,4653.504516,9709.637048,0
4,5,67.78781,144.2971,4595.187185,9781.584398,0
...,...,...,...,...,...,...
24995,24996,69.50215,118.0312,4830.548855,8203.422167,3
24996,24997,64.54826,120.1932,4166.477869,7758.261924,3
24997,24998,64.69855,118.2655,4185.902372,7651.606365,3
24998,24999,67.52918,132.2682,4560.190151,8931.963086,3


In [3]:
df['X_sqr'] = df['X'].apply(lambda x: np.square(x))
df['Xy'] = df[['X', 'y']].apply(lambda row: row.X * row.y, axis=1)

In [60]:
df

Unnamed: 0,Index,X,y,X_sqr,Xy,node
0,1,65.78331,112.9925,4327.443875,7433.020655,0
1,2,71.51521,136.4873,5114.425261,9760.917922,0
2,3,69.39874,153.0269,4816.185114,10619.874046,0
3,4,68.21660,142.3354,4653.504516,9709.637048,0
4,5,67.78781,144.2971,4595.187185,9781.584398,0
...,...,...,...,...,...,...
24995,24996,69.50215,118.0312,4830.548855,8203.422167,3
24996,24997,64.54826,120.1932,4166.477869,7758.261924,3
24997,24998,64.69855,118.2655,4185.902372,7651.606365,3
24998,24999,67.52918,132.2682,4560.190151,8931.963086,3


Unnamed: 0,Index,X,y,X_sqr,Xy,node
18750,18751,65.29563,113.4145,4263.519297,7405.471229,3
18751,18752,63.64036,120.7483,4050.095421,7684.465281,3
18752,18753,67.44444,121.6074,4548.752487,8201.742993,3
18753,18754,67.26357,155.4769,4524.387849,10457.931347,3
18754,18755,69.67211,150.7134,4854.202912,10500.520583,3
...,...,...,...,...,...,...
24995,24996,69.50215,118.0312,4830.548855,8203.422167,3
24996,24997,64.54826,120.1932,4166.477869,7758.261924,3
24997,24998,64.69855,118.2655,4185.902372,7651.606365,3
24998,24999,67.52918,132.2682,4560.190151,8931.963086,3


In [5]:
L = 0.001

In [6]:
doubly_stochastic_matrix_config = [
    [1 / 2, 1 / 4, 1 / 8, 1 / 8],
    [1 / 4, 3 / 4, 0, 0],
    [1 / 8, 0, 7 / 8, 0],
    [1 / 8, 0, 0, 7 / 8],
]

In [19]:
s1 = [1.7, 1.9, 1.1, 1.8]
s2 = [1.7, 1.9, 1.1, 1.9]

In [38]:
def get_front(s, pos):
    dbl_st_mat = doubly_stochastic_matrix_config[pos]
    return sum(i*j for i, j in zip(dbl_st_mat, s))

In [39]:
def get_p():
    return -2/df.Index.count()

In [40]:
def get_q():
    return np.sum(df['Xy'])

In [41]:
def get_r():
    return np.sum(df['X_sqr'])

In [42]:
p = get_p()
q = get_q()
r = get_r()
p, q, r

(np.float64(-8e-05),
 np.float64(216291902.5483566),
 np.float64(115666993.35202293))

In [66]:
def get_back(s, pos):
    m = s[pos]
    df_copy = df[df["node"]==pos]
    return np.sum(df_copy['Xy'].array - m * df_copy['X_sqr']) 

In [78]:
s = s2[:]
print(s)
for i in range(15):
    # for pos in range(len(s)):
    for pos in [3]:
        frnt = get_front(s, pos)
        back = get_back(s, pos)
        # s[pos] = frnt - L * p * (q - s[pos] * r)
        s[pos] = frnt - L * p * back
        # print(frnt, s)

    print(s)

[1.7, 1.9, 1.1, 1.9]
[1.7, 1.9, 1.1, np.float64(1.802372016636912)]
[1.7, 1.9, 1.1, np.float64(1.9430036149641206)]
[1.7, 1.9, 1.1, np.float64(1.7404259751647626)]
[1.7, 1.9, 1.1, np.float64(2.0322359285096407)]
[1.7, 1.9, 1.1, np.float64(1.6118882092081452)]
[1.7, 1.9, 1.1, np.float64(2.2173925900444758)]
[1.7, 1.9, 1.1, np.float64(1.3451729053005277)]
[1.7, 1.9, 1.1, np.float64(2.6015918554063067)]
[1.7, 1.9, 1.1, np.float64(0.7917398126284325)]
[1.7, 1.9, 1.1, np.float64(3.398803655840501)]
[1.7, 1.9, 1.1, np.float64(-0.35663144157859916)]
[1.7, 1.9, 1.1, np.float64(5.053014665726181)]
[1.7, 1.9, 1.1, np.float64(-2.7394967869120084)]
[1.7, 1.9, 1.1, np.float64(8.485495298522423)]
[1.7, 1.9, 1.1, np.float64(-7.68393198867234)]


In [79]:
s = s1[:]
print(s)
for i in range(15):
    # for pos in range(len(s)):
    for pos in [3]:
        frnt = get_front(s, pos)
        back = get_back(s, pos)
        # s[pos] = frnt - L * p * (q - s[pos] * r)
        s[pos] = frnt - L * p * back
        # print(frnt, s)

    print(s)

[1.7, 1.9, 1.1, 1.8]
[1.7, 1.9, 1.1, np.float64(1.946420468200421)]
[1.7, 1.9, 1.1, np.float64(1.7355040509856743)]
[1.7, 1.9, 1.1, np.float64(2.0393258840767476)]
[1.7, 1.9, 1.1, np.float64(1.601675237997187)]
[1.7, 1.9, 1.1, np.float64(2.232104216932488)]
[1.7, 1.9, 1.1, np.float64(1.3239810345685452)]
[1.7, 1.9, 1.1, np.float64(2.632118417053068)]
[1.7, 1.9, 1.1, np.float64(0.747766773260693)]
[1.7, 1.9, 1.1, np.float64(3.462146138155142)]
[1.7, 1.9, 1.1, np.float64(-0.4478753065347285)]
[1.7, 1.9, 1.1, np.float64(5.184450040342185)]
[1.7, 1.9, 1.1, np.float64(-2.928827408853059)]
[1.7, 1.9, 1.1, np.float64(8.75822312776407)]
[1.7, 1.9, 1.1, np.float64(-8.076792203677703)]
[1.7, 1.9, 1.1, np.float64(16.17378670174356)]
