In [None]:
# !pip install wandb

# !wandb login

In [None]:
import pandas as pd
import wandb

areas = [6.7, 4.6, 3.5, 5.5]
prices = [9.1, 5.9, 4.6, 6.7]

dataset = pd.DataFrame({
    'areas': areas,
    'prices': prices
})

In [None]:
#forward
def predict(x, w, b):
  return x*w + b

#compute gradient
def gradient(y_hat, y, x):
  dw = 2*x*(y_hat-y)
  db = 2*(y_hat-y)

  return (dw, db)

#update weights
def update_weights(w, b, lr, dw, db):
  w_new = w - lr*dw
  b_new = b - lr*db

  return (w_new, b_new)

In [None]:
#init weights
b = 0.04
w = -0.34
lr = 0.01
epochs = 10

#init project wandb
wandb.init(
    #Set the project where this run will be logged
    project = 'demo-linear-regression',
    config={
        'learning-rate':lr,
        'epochs':epochs,
    },
)

wandb.run.log({'Dataset': wandb.Table(dataframe=dataset)})

X_train = dataset['areas']
Y_train = dataset['prices']

N = len(X_train)
#parameter
losses = [] # for debug

for epoch in range(epochs):
  #for an epoch
  for i in range(N):
    #get a sample
    x = X_train[i]
    y = Y_train[i]

    #predict y_hat
    y_hat = predict(x, w, b)

    #compute loss
    loss = (y_hat-y)*(y_hat-y)/2.0

    #tracking loss with WandB
    wandb.log({'loss': loss})

    #compute gradient
    (dw, db) = gradient(y_hat, y, x)

    #update weights
    (w, b) = update_weights(w, b, lr, dw, db)

#Mark a run as finished, and finish uploading all data
wandb.finish()

Bài tập


import pandas as pd
import wandb

In [None]:
dataset = pd.read_csv('/content/advertising.csv')
dataset.head()

In [None]:
tv_data = df.TV.values.tolist()
radio_data = df.Radio.values.tolist()
newspaper_data = df.Newspaper.values.tolist()
sales_data = df.Sales.values.tolist()

In [None]:
def scaling(data1, data2, data3):
  data = data1+data2+data3

  max_value = max(data)
  min_value = min(data)
  mean_value = sum(data)/len(data)

  data1 = [(x - mean_value)/(max_value - min_value) for x in data1]
  data2 = [(x - mean_value)/(max_value - min_value) for x in data2]
  data3 = [(x - mean_value)/(max_value - min_value) for x in data3]

  return (data1, data2, data3), (mean_value, max_value, min_value)

In [None]:
(tv_data, radio_data, newspaper_data), (mean_v, max_v, min_v) = scaling(tv_data, radio_data, newspaper_data)

In [None]:
print(tv_data[:5])
print(radio_data[:5])
print(newspaper_data[:5])
print(mean_v, max_v, min_v)

In [None]:
dataset = pd.DataFrame({
    'TV': tv_data,
    'Radio': radio_data,
    'Newpaper': newspaper_data,
    'Sales': sales_data
})

In [None]:
def predict(x1, x2, x3, w1, w2, w3, b):
  return x1*w1 + x2*w2 + x3*w3 + b

def compute_loss(y_hat, y):
  return (y_hat - y)**2

#compute gradient
def compute_gradient_wi(xi, y, y_hat):
  dl_dwi = 2*xi*(y_hat-y)
  return dl_dwi

def compute_gradient_b(y, y_hat):
  dl_db = 2*(y_hat-y)
  return dl_db

#Update weights
def update_weights_wi(wi, dl_dwi, lr):
  wi = wi - lr*dl_dwi
  return wi

def update_weights_b(b, dl_db, lr):
  b = b - lr*dl_db
  return b


In [None]:
#init weights
w1 = w2 = w3 = 0
b = 1
epochs = 1000
lr = 0.01

#init project wandb
wandb.init(
    #Set the project where this run will be logged
    project='abvertising-Linear-Regression',
    config={
        'learning_rate':lr,
        'epochs': epochs,
    },
)

wandb.run.log({'Dataset': wandb.Table(dataframe=dataset)})

N = len(tv_data)
#parameter
losses = [] #for debug

for epoch in range(epochs):
  #some variables

  loss_total = 0.0

  dw1_total = 0.0
  dw2_total = 0.0
  dw3_total = 0.0
  db_total = 0.0

  for i in range(N):
    x1 = tv_data[i]
    x2 = radio_data[i]
    x3 = newspaper_data[i]
    y = sales_data[i]

    #compute output
    y_hat = predict(x1, x2, x3, w1, w2, w3, b)

    #compute loss
    loss = compute_loss(y=y, y_hat=y_hat)
    loss_total = loss_total + loss

    #compute gradient w1, w2, w3, b
    dl_dw1 = compute_gradient_wi(x1, y, y_hat)
    dl_dw2 = compute_gradient_wi(x2, y, y_hat)
    dl_dw3 = compute_gradient_wi(x3, y, y_hat)
    dl_db = compute_gradient_b(y=y, y_hat=y_hat)

    #accumulate
    dw1_total = dw1_total + dl_dw1
    dw2_total = dw2_total + dl_dw2
    dw3_total = dw3_total + dl_dw3
    db_total = db_total + dl_db

  w1 = update_weights_wi(w1, dl_dw1/N, lr)
  w2 = update_weights_wi(w2, dl_dw2/N, lr)
  w3 = update_weights_wi(w3, dl_dw3/N, lr)
  b = update_weights_b(b, dl_db/N, lr)

  #logging
  wandb.log({'loss': loss_total/N})

wandb.finish()
