## 파일 목적  
실제 데이터(concrete 데이터셋)를 불러와 Surrogate Model에 gradient descent, GA를 구현하고 end-to-end로 문제없이 작동하는지 확인하기 위함

## 데이터 불러오기

In [None]:
import kagglehub

In [None]:
path = kagglehub.dataset_download("vinayakshanawad/cement-manufacturing-concrete-dataset")

In [None]:
print("Path to dataset files:", path)

In [None]:
path = "/data/ephemeral/home/.cache/kagglehub/datasets/vinayakshanawad/cement-manufacturing-concrete-dataset/versions/1"

In [None]:
import fireducks.pandas as pd
df = pd.read_csv(path + "/concrete.csv")

df.head()

## 간단한 데이터 EDA

In [None]:
df.describe()

In [None]:
df.corr()


In [None]:

import torch
import sklearn

In [None]:
train,test = sklearn.model_selection.train_test_split(df, test_size=0.2, random_state=42)

In [None]:
train.shape

In [None]:
test.shape

In [None]:
train.describe()

In [None]:
test.describe()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

Q1 = train['slag'].quantile(0.25)
Q3 = train['slag'].quantile(0.75)
IQR = Q3 - Q1

print(f'Q1: {Q1}, Q3: {Q3}, IQR: {IQR}')

plt.figure(figsize=(8, 6))
plt.boxplot(train['slag'], vert=False, patch_artist=True, showmeans=True, meanline=True)
plt.xlabel('slag', fontsize=12)
plt.show()

In [None]:
Q1 = test['slag'].quantile(0.25)
Q3 = test['slag'].quantile(0.75)
IQR = Q3 - Q1

print(f'Q1: {Q1}, Q3: {Q3}, IQR: {IQR}')

plt.figure(figsize=(8, 6))
plt.boxplot(test['slag'], vert=False, patch_artist=True, showmeans=True, meanline=True)
plt.xlabel('slag', fontsize=12)
plt.show()

## Search Model 학습 준비 및 학습

In [None]:
class ConcreteDataset(torch.utils.data.Dataset):
    def __init__(self, df, mean,std):
        self.df = df
        self.mean = mean
        self.std = std


    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        x = self.df.iloc[idx][['cement','slag','ash','water','superplastic','coarseagg','fineagg','age']].to_numpy()
        y = self.df.iloc[idx][['strength']].to_numpy()
        x = torch.tensor(x, dtype=torch.float32)    
        y = torch.tensor(y, dtype=torch.float32)
        x = (x - self.mean[:-1]) / self.std[:-1]
        # y = (y - self.mean[7]) / self.std[7]
        y = torch.log(y)
        # if self.transform:
        #     x = self.transform(x)
            
        return x, y

In [None]:
# dataset = ConcreteDataset(df)

# dataset[0]

In [None]:
train

In [None]:
mean = train[['cement','slag','ash','water','superplastic','coarseagg','fineagg','age','strength']].to_numpy().mean(axis=0)
std = train[['cement','slag','ash','water','superplastic','coarseagg','fineagg','age','strength']].to_numpy().std(axis=0)
mean = torch.tensor(mean, dtype=torch.float32)
std = torch.tensor(std, dtype=torch.float32)

In [None]:
print(mean)
print(std)

In [None]:
# import torchvision.transforms as transforms
# transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])
dataset_train = ConcreteDataset(train,mean,std)
dataset_test = ConcreteDataset(test,mean,std)

In [None]:
from torch.utils.data import DataLoader
train_loader = DataLoader(dataset_train, batch_size=100, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=100, shuffle=False)

In [None]:
class simple_nn(torch.nn.Module):
    def __init__(self):
        super(simple_nn, self).__init__()
        self.fc1 = torch.nn.Linear(8, 16)
        self.fc2 = torch.nn.Linear(16, 32)
        self.fc3 = torch.nn.Linear(32, 1)
    def forward(self, x):
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        x = torch.relu(x)
        xf = self.fc3(x)
        return xf,x

In [None]:
model = simple_nn()
model.cuda()

In [None]:
import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
import numpy as np
losses = []
val_losses = []
errors = []
for i in range(200):
    Lo = []
    for inputs, outputs in train_loader:
        inputs = inputs.cuda()
        outputs = outputs.cuda()
        model.zero_grad()
        pred,_ = model(inputs)
        loss = torch.nn.MSELoss()(pred, outputs)
        
        loss.backward()
        optimizer.step()
        Lo.append(loss.item())
    
    losses.append(np.mean(Lo))
    err = []
    vLo = []
    for inputs, outputs in test_loader:
        inputs = inputs.cuda()
        outputs = outputs.cuda()
        pred,_ = model(inputs)
        loss = torch.nn.MSELoss()(pred, outputs)
        vLo.append(loss.item())
        err.append(abs(torch.exp(pred.detach().cpu())- torch.exp(outputs.detach().cpu())).numpy())
    errors.append(np.concatenate(err,axis=0).mean())
    val_losses.append(np.mean(vLo))
    print(f'losses: {np.mean(losses)}')
    print(f'val_losses: {np.mean(val_losses)}')
    print(f'errors: {np.concatenate(err,axis=0).mean()}')

In [None]:
len(val_losses)

In [None]:
mean

In [None]:

err = []
mean_err = []
pred_y = []

for inputs, outputs in test_loader:
    inputs = inputs.cuda()
    outputs = outputs.cuda()
    pred,_ = model(inputs)
    loss = torch.nn.MSELoss()(pred, outputs)
    val_losses.append(loss.item())
    pred_y.append(torch.exp(pred.detach().cpu()).numpy())
    y_p = (torch.exp(pred.detach().cpu())- torch.exp(outputs.detach().cpu()))**2
    y_t = (mean[-1].repeat(outputs.shape[0]).reshape(outputs.shape[0],1) - torch.exp(outputs.detach().cpu()))**2
    err.append(y_p.numpy())
    mean_err.append(y_t.numpy())


In [None]:
test[['cement','slag','ash','water','superplastic','coarseagg','fineagg','age']].to_numpy().shape

In [None]:
err = np.concatenate(err,axis=0)
mean_err = np.concatenate(mean_err,axis=0)


In [None]:
pred_y = np.concatenate(pred_y,axis=0)

In [None]:
print(err.shape)
print(mean_err.shape)
print(pred_y.shape)

In [None]:
print(1- np.mean(err)/np.mean(mean_err))

In [None]:
n = 1030
p = 8

In [None]:
print(1- (np.mean(err)/(n-p-1))/(np.mean(mean_err)/(n-1)))

In [None]:
max(err)

In [None]:
err[np.where(err < 100)[1]].shape

In [None]:
err[np.where(err < 300)[1]].shape

In [None]:
mean[:-1]

In [None]:
std[:-1]

## Surrogate Model - 시도 1 : Gradient Descent

In [None]:
from tqdm import tqdm

In [None]:
mean

In [None]:
flag = 0
save_SSE = []
save_SST = []
patience = 10
lr_decay_factor = 0.5  
for x,y in tqdm(test_loader):
    x = x.cuda()
    y = y.cuda()
    # print(x.shape)
    # brewa
    y_opt = y.detach()
    init_x = torch.randn(x.shape[0],x.shape[1], device='cuda', requires_grad=True) # .requires_grad_(True).cuda()
    # init_x = mean[:-1].expand(10,8).clone().requires_grad_(True)
    init_x.requires_grad = True
    optimizer = optim.Adam([init_x], lr=0.1)
    
    
    for param in model.parameters():
        param.requires_grad = False

    min_val = 1e6
    min_yp = None
    min_yt = None

    no_improve_epochs = 0

    for i in range(10000):
        optimizer.zero_grad()
        # with torch.no_grad():
        pred,features = model(init_x)
        loss = torch.nn.MSELoss()(pred, y_opt)
        loss.backward()
        optimizer.step()
        if loss.item() < min_val:
            min_val = loss.item()
            min_yp = (init_x.detach().cpu()*std[:-1] - x.detach().cpu()*std[:-1])**2
            min_yt = (mean[:-1].repeat(init_x.shape[0]).reshape(init_x.shape[0],-1) - x.detach().cpu()*std[:-1])**2
            # print(init_x.detach().cpu()*std[:-1].mean())
            # print("??")
            # print((x.detach().cpu()*std[:-1])[0])
            # print((mean[:-1].repeat(init_x.shape[0]).reshape(init_x.shape[0],-1))[0])
            # print((init_x.detach().cpu()*std[:-1])[0])
            # break
            # print(min_x.mean())
        else:
            no_improve_epochs += 1
        
        if no_improve_epochs > patience:
            optimizer.param_groups[0]['lr'] *= lr_decay_factor
            # print(f'lr decayed to {optimizer.param_groups[0]["lr"]}')
            no_improve_epochs = 0
        # print((init_x.detach().cpu() - x.cpu()).numpy().mean())
    
    save_SSE.append(min_yp.numpy())
    save_SST.append(min_yt.numpy())
    


In [None]:
save_SSE = np.concatenate(save_SSE,axis=0)
save_SST = np.concatenate(save_SST,axis=0)
print(save_SSE.shape)
print(save_SST.shape)
print(1- np.mean(save_SSE)/np.mean(save_SST))


In [None]:
std# np.concatenate(save1,axis=0).shape

In [None]:
save_SSE.shape

In [None]:
save_SSE.mean(axis=1).shape

In [None]:
save_SST.mean(axis=1).shape

In [None]:
df.columns

In [None]:
1 - (save_SSE.mean(axis=0) / save_SST.mean(axis=0))

In [None]:
df.columns


In [None]:
std

In [None]:
mean

## Surrogate Model - 시도 2 : GA(deap 라이브러리)

In [None]:
import numpy as np
import torch

model.eval()

In [None]:
train.describe()

In [None]:
test.describe()

In [None]:
y_target = test.iloc[:,-1]
print(y_target)

In [None]:
for gt_y in y_target:
    print(type(gt_y)) # str
    break

In [None]:
y_target.dtype

In [None]:
# 적합도 함수
def fitness(individual):
    """
    individual : population의 한 개체
    """
    x_tensor = torch.tensor(individual, dtype=torch.float32).unsqueeze(0).to('cuda') # 배치차원추가
    with torch.no_grad():
        y_pred, _ = model(x_tensor)
    fit_fun = -abs(y_pred - y_target)
    return fit_fun

In [None]:
from deap import base, creator, tools, algorithms
import random

creator.create('FitnessMax', base.Fitness, weights=(1.0,))
creator.create('Individual', list, fitness=creator.FitnessMax)


toolbox = base.Toolbox()
toolbox.register('attr_float', random.uniform, x_min, x_max)
toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.attr_float, n=1)
toolbox.register('population', tools.initRepeat, list, toolbox.individual)

toolbox.register('evaluate', fitness)
toolbox.register('select', tools.selTournament, tournsize=3)
toolbox.register('mate', tools.cxBlend, alpha=0.5)
toolbox.register('mutate', tools.mutGaussian, mu=0, sigma=1, indpb=0.2)


In [None]:
pop_size = 50
population = toolbox.population(n=pop_size)

In [None]:
print(len(population))

In [None]:
print(population[0])

In [None]:
print(len(population[0]))

In [None]:
alist = []
for idx, ind in enumerate(population):
    if idx < 2:
        print(len(ind))
        # print(toolbox.evaluate(ind))
        # print(toolbox.evaluate(ind)[0])
        alist.append(toolbox.evaluate(ind)[0])
        
print(alist)
print(alist[0])
print(alist[0].item())

In [None]:
for ind in population:
    if hasattr(ind, 'fitness'):
        print('true')
    else:
        print('false')
    break

In [None]:
fitness_scores = [toolbox.evaluate(ind)[0] for ind in population]
for ind, fit in zip(population, fitness_scores):
    # ind.fitness.values = (fit,)
    # print(fit.item())
    print((fit,))
    print(ind.fitness)
    # print(ind.fitness.values)
    # ind.fitness = fit.item()
    break

In [None]:
print(len(population))

In [None]:
parents = toolbox.select(population, len(population) // 2)
print(len(parents))

In [None]:
offspring = tools.selBest(parents, k=len(population))
# print(offspring)
print(len(offspring))

In [None]:
offspring = list(map(toolbox.clone, offspring))
# print(offspring)
print(len(offspring))

In [None]:
for i in range(1, len(offspring), 2):
    if random.random() < 0.7:
        print(toolbox.mate(offspring[i-1], offspring[i]))
        print(len(toolbox.mate(offspring[i-1], offspring[i])))
    break

In [None]:
# print(offspring)
print(len(offspring))

In [None]:
for child in offspring:
    if random.random() < 0.2:
        print(toolbox.mutate(child))
        print(len(toolbox.mutate(child)))
    break

In [None]:
for ind in offspring:
    print(ind)
    break

In [None]:
train

In [None]:
for x_,y_ in dataset_train:
    print(x_)
    print(y_)
    break

In [None]:
train_except_strength = train.drop(columns='strength')
train_except_strength

In [None]:
# y target 뺀 train array
train_array = np.array(train_except_strength)

In [None]:
# train array 열 별 sum
x_sum = np.sum(train_array, axis=0)
x_sum

In [None]:
train_len = train_array.shape[0]

In [None]:
x_mean = x_sum / train_len
x_mean

In [None]:
x_std = np.std(train_array, axis=0)

In [None]:
normalize_train_array = (train_array - x_mean) / x_std
normalize_train_array

In [None]:
x_min = np.min(normalize_train_array, axis=0)  # 열별 최소값
x_max = np.max(normalize_train_array, axis=0)  # 열별 최대값

In [None]:
print(x_min)
print(x_max)

In [None]:
train_x_normalize_mean = normalize_train_array.mean(axis=0)

In [None]:
import random
from deap import base, creator, tools

SSE_element = []
SST_element = []

for idx, row in tqdm(test.iterrows(), total=len(test)):
    row = np.array(row)
    gt_y = row[-1]
    gt_x = row[:-1]
    gt_x = (gt_x - x_mean) / x_std
    gt_y = np.log(gt_y)

    def fitness(individual):
        x_tensor = torch.tensor(individual, dtype=torch.float32).unsqueeze(0).to('cuda') # 배치차원추가
        with torch.no_grad():
            y_pred, _ = model(x_tensor)
        # fit_fun = -abs(y_pred - gt_y)
        fit_fun = -(y_pred - gt_y)**2
        return fit_fun

    creator.create('FitnessMax', base.Fitness, weights=(1.0,))
    creator.create('Individual', list, fitness=creator.FitnessMax)


    toolbox = base.Toolbox()
    toolbox.register('attr_float', random.uniform, x_min, x_max)
    toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.attr_float, n=1)
    toolbox.register('population', tools.initRepeat, list, toolbox.individual)

    toolbox.register('evaluate', fitness)
    # toolbox.register('select', tools.selTournament, tournsize=3)
    toolbox.register('select', tools.selBest, k=5) # Rank Selection
    toolbox.register('mate', tools.cxBlend, alpha=0.5)
    toolbox.register('mutate', tools.mutGaussian, mu=0, sigma=1, indpb=0.2)

    pop_size = 50
    population = toolbox.population(n=pop_size)

    for gen in range(100):

        fitness_scores = [toolbox.evaluate(ind)[0] for ind in population]
        for ind, fit in zip(population, fitness_scores):
            ind.fitness.values = (fit,)
            # ind.fitness = fit.item()

        # offspring 생성
        if len(population) == 1:
            break
        # parents = toolbox.select(population, len(population) // 2)
        parents = toolbox.select(population) # Rank Selection
        # print('population : ', len(population))
        # print('parents : ', len(parent
        # s))
        offspring = tools.selBest(parents, k=len(population))
        offspring = list(map(toolbox.clone, offspring))
        # print('offspring : ', len(offspring))

        # crossover
        for i in range(1, len(offspring), 2):
            if random.random() < 0.7:
                toolbox.mate(offspring[i-1], offspring[i])
                # print('cross over : ', len(toolbox.mate(offspring[i-1], offspring[i])))
        # print('offspring의 길이 - crossover 후' , len(offspring))

        # mutation
        for child in offspring:
            if random.random() < 0.2:
                toolbox.mutate(child)
                # print('child : ', len(toolbox.mutate(child)))

        # print('offspring의 길이 -  mutation 후' , len(offspring))

        # 새로운 자식만 평가
        for ind in offspring:
            del ind.fitness.values

        # 다음 세대 개체로 갱신
        population[:] = offspring
        
        # print(len(population))
        
    best_individual = tools.selBest(population, k=1)[0]
    # print("Best Individual:", best_individual)
    best_individual = best_individual[0]
        
    gt_x = np.array([gt_x])
    x_pred = np.array(best_individual)

    # print(gt_x.shape)
    # print(x_pred.shape)
    x_pred = x_pred.reshape(1,8)
    # print(x_pred.shape)
    # print(x_mean.shape)

    # print('gt_x : ', gt_x)
    # print('x_pred : ', x_pred)
    
    # print('sum_gt_x : ', np.sum(gt_x))
    # x_mean = np.sum(gt_x)/len(test_len)
    # print('x_mean : ', x_mean)

    # x_bar = []
    # for _ in range(len(gt_x[0])):
    #     x_bar.append(x_mean)
    # x_bar = np.array(x_mean)
    # print('x_bar : ', x_bar)

    for num in (gt_x - x_pred):
        SSE_element.append(num**2)
    # print('sse_element : ', SSE_element)

    for num in (gt_x - train_x_normalize_mean.reshape(1,8)):
        SST_element.append(num**2)
    
    # print(gt_x.shape)
    # print(train_x_normalize_mean.shape)
    # print(x_pred.shape)
    # break 
    # print('sst_element : ', SST_element)

In [None]:
print(len(SSE_element))
print(len(SST_element))

In [None]:
SSE = np.sum(SSE_element, axis=0)
SST = np.sum(SST_element, axis=0)
print(SSE, SST)

In [None]:
print(SSE_element[0].shape)
print(SST_element[0].shape)

In [None]:
r_squared = 1 - (SSE/SST)
print(r_squared)

## Surrogate Model - 시도 3 : GA(pygmo 라이브러리)

In [None]:
import pygmo as pg

In [None]:
class Sphere:
    def __init__(self, model, target, x_min, x_max):
        self.model = model
        self.target = target
        self.x_min = x_min
        self.x_max = x_max

    def fitness(self, x):
        x_tensor = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to('cuda')

        with torch.no_grad():
            y_pred = self.model(x_tensor)
        
        if isinstance(y_pred, tuple):
            y_pred = y_pred[0]
        
        fit_fun = -((y_pred.item() - self.target) ** 2)
        return [fit_fun]

    def get_bounds(self):
        # return ([-5] * 8, [5] * 8)  # 탐색 범위 설정 (8차원 예시)
        return (self.x_min.tolist(), self.x_max.tolist())



In [None]:
from tqdm import tqdm

results = []
for gt_y in tqdm(y_target):

    prob = pg.problem(Sphere(model, gt_y, x_min, x_max))
    algo = pg.algorithm(pg.gaco(gen=100, ker=50, q=1.0, oracle=0.0, acc=0.01, threshold=1, memory=False))
    pop = pg.population(prob, size=50)
    pop = algo.evolve(pop)
    results.append((pop.champion_x, pop.champion_f))

In [None]:
best_solution = min(results, key=lambda x: x[1])
print("Best solution (x):", best_solution[0])

In [None]:
import numpy as np
import torch
import pygmo as pg
from tqdm import tqdm

SSE_element = []
SST_element = []

class SphereProblem:
    def __init__(self, model, gt_y, x_min, x_max):
        self.model = model
        self.gt_y = gt_y
        self.x_min = x_min
        self.x_max = x_max

    def fitness(self, x):
        x_tensor = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to('cuda')  # 배치 차원 추가
        with torch.no_grad():
            y_pred, _ = self.model(x_tensor)
    
        fit_fun = -((y_pred.item() - self.gt_y) ** 2)
        return [fit_fun]

    def get_bounds(self):
        return (self.x_min.tolist(), self.x_max.tolist())

for idx, row in tqdm(test.iterrows(), total=len(test)):
    row = np.array(row)
    gt_y = row[-1]
    gt_x = row[:-1]
    gt_x = (gt_x - x_mean) / x_std
    gt_y = np.log(gt_y)

    # x_min = np.min(test.iloc[:, :-1].values, axis=0)
    # x_max = np.max(test.iloc[:, :-1].values, axis=0)
    x_min = np.min(normalize_train_array, axis=0)
    x_max = np.max(normalize_train_array, axis=0)

    prob = pg.problem(SphereProblem(model, gt_y, x_min, x_max))

    algo = pg.algorithm(pg.sga(gen=100, cr=0.7, eta_c=1.0, m=0.2, param_m=1.0))

    pop = pg.population(prob, size=50)
    pop = algo.evolve(pop)

    best_individual = pop.champion_x

    gt_x = np.array([gt_x])
    x_pred = np.array(best_individual)
    # print(gt_x.shape)
    # print(x_pred.shape)
    x_pred = x_pred.reshape(1,8)
    # print(x_pred.shape)
    # print(x_mean.shape)
    for num in (gt_x - x_pred):
        SSE_element.append(num ** 2)

    for num in (gt_x - train_x_normalize_mean.reshape(1,8)):
        SST_element.append(num ** 2)

print('SSE:', np.sum(SSE_element))
print('SST:', np.sum(SST_element))

In [None]:
print(len(SSE_element))
print(len(SST_element))

In [None]:
print(SSE_element[0].shape)
print(SST_element[0].shape)

In [None]:
SSE = np.sum(SSE_element, axis=0)
SST = np.sum(SST_element, axis=0)

In [None]:
r_squared = 1 - (SSE/SST)
print(r_squared)

## Surrogate Model - 시도 4 : GA(pygad 라이브러리)

In [None]:
import numpy as np
import torch
from tqdm import tqdm
import pygad

SSE_element = []
SST_element = []


def fitness_function(ga_instance, solution, solution_idx):
    x_tensor = torch.tensor(solution, dtype=torch.float32).unsqueeze(0).to('cuda')  # 배치 차원 추가
    with torch.no_grad():
        y_pred, _ = model(x_tensor)

    fit_fun = -((y_pred.item() - ga_instance.gt_y) ** 2)
    return fit_fun


for idx, row in tqdm(test.iterrows(), total=len(test)):
    row = np.array(row)
    gt_y = row[-1]
    gt_x = row[:-1]
    gt_x = (gt_x - x_mean) / x_std
    gt_y = np.log(gt_y)

    # x_min = np.min(train.iloc[:, :-1].values, axis=0)
    # x_max = np.max(train.iloc[:, :-1].values, axis=0)
    x_min = np.min(normalize_train_array, axis=0)
    x_max = np.max(normalize_train_array, axis=0)


    num_generations = 100
    num_parents_mating = 10
    sol_per_pop = 50
    num_genes = len(gt_x)

    initial_population = np.random.uniform(low=x_min, high=x_max, size=(sol_per_pop, num_genes))

    ga_instance = pygad.GA(
        num_generations=num_generations,
        num_parents_mating=num_parents_mating,
        fitness_func=fitness_function,
        sol_per_pop=sol_per_pop,
        num_genes=num_genes,
        init_range_low=x_min,
        init_range_high=x_max,
        mutation_percent_genes=20,
        parent_selection_type="rank",
        crossover_type="single_point",
        mutation_type="random"
    )

    ga_instance.gt_y = gt_y

    ga_instance.run()

    best_solution, best_solution_fitness, _ = ga_instance.best_solution()

    x_pred = np.array(best_solution).reshape(1, -1)
    # print(x_pred.shape)
    gt_x = np.array([gt_x])
    # print(gt_x.shape)

    for num in (gt_x - x_pred):
        SSE_element.append(num ** 2)

    x_mean = np.mean(gt_x)
    for num in (gt_x - train_x_normalize_mean.reshape(1,8)):
        SST_element.append(num ** 2)
    

print('SSE:', np.sum(SSE_element))
print('SST:', np.sum(SST_element))

In [None]:
print(len(SSE_element))
print(len(SST_element))

In [None]:
print(SSE_element[0].shape)
print(SST_element[0].shape)

In [None]:
SSE = np.sum(SSE_element, axis=0)
SST = np.sum(SST_element, axis=0)

In [None]:
r_squared = 1 - (SSE/SST)
print(r_squared)