In [75]:
import numpy as np
import math
import numpy.linalg as linalg
import pandas as pd
from matplotlib import pyplot
import sklearn as skl
from sklearn.preprocessing import PolynomialFeatures
import heapq


In [76]:
def tricubic(x):
    y = np.zeros_like(x)
    idx = (x >= 0) & (x <= 1)
    y[idx] = np.power(1.0 - np.power(np.abs(x[idx]), 3), 3)
    return y

In [136]:
def normalize(array):
    stds = array.std(axis=0)
    
    return (array- array.mean(axis=0))/stds

In [193]:
def get_indexes(distances, q):
    mins = []
    for i in range(0, len(distances)):
        if len(mins) >= q:
            if mins[0][0] < -1*distances[i]:
                heapq.heapreplace(mins, (-1*distances[i], i))
        else:
            heapq.heappush(mins, (-1*distances[i], i))

    indexes = []
    max_dist = 0.0
    for (dist, index) in mins:
        indexes.append(index)
        if -1*dist > max_dist:
            max_dist = -1*dist
    return indexes, max_dist

In [194]:
def get_weights(distances, max_dist):
    weights = tricubic(distances/max_dist)
    return np.diag(weights)

In [216]:
def loess(data, Y, f=0.1, fit='quadratic'):
    n = len(Y)
    q = int(math.floor(f*n))
    data_norm = normalize(data)
    if fit == 'quadratic':
        poly = PolynomialFeatures(2)
    
    y_ests = np.zeros_like(Y)
    
    for i in range(0, len(data_norm)):
        distances = linalg.norm(data_norm - data_norm[i], ord=2, axis=1)
        indexes, max_dist = get_indexes(distances, q)
        W = get_weights(distances[indexes], max_dist)
        b = Y[indexes]
        if fit=='quadratic':
            A = poly.fit_transform(data_norm[indexes])
        else:
            A = np.append(np.ones((len(indexes),1)), data_norm[indexes], axis=1)
            
        At = np.transpose(A)
        try:
            coeffs = linalg.solve(np.dot(At, np.dot(W, A)), np.dot(At, np.dot(W, b)))
        except:
            print('matriz singular')
            print(data_norm[i])
            print(data_norm[indexes])
            print(W)
            coeffs = linalg.solve(np.dot(At, A), np.dot(At, b))
        
        
        if fit=='quadratic':
            
            y_est = np.dot(poly.fit_transform(data_norm[i].reshape(1, -1)),coeffs)
        else:
            y_est = np.dot(np.append([1.0], data_norm[i]), coeffs)
        
        y_ests[i] = y_est
    
    return y_ests

In [214]:
data = pd.read_csv('../data/dataset.csv',usecols=['ozone', 'radiation', 'temperature', 'wind'])

In [215]:
x=data[['radiation', 'temperature', 'wind']].to_numpy()
y = data['ozone'].to_numpy()
estimates = loess(x, y, 0.05, 'quadratic')
print(estimates)
print(y)

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 1.31484485e-04 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 6.66706909e-04 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  8.78064509e-01]]
[[0.         0.         0.         0.         0.        ]
 [0.         0.02997821 0.         0.         0.        ]
 [0.         0.         0.68598803 0.         0.        ]
 [0.         0.         0.         1.         0.        ]
 [0.         0.         0.         0.         0.66668475]]
[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 6.12102518e-04 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 4.28937968e-02
  0.00

In [171]:
normalized = normalize(x)
distances = linalg.norm(normalized - normalized[0], ord=2, axis=1)
print(distances)

[0.         0.967492   1.70367235 1.85854871 1.2657361  2.23148963
 4.09866869 0.99742638 1.2180685  1.35788406 2.3403565  1.98932047
 1.83280159 3.50328139 1.86179189 1.81327531 2.27054263 3.0322614
 2.03170633 1.80330836 2.34316376 2.66931741 1.40086689 1.36454344
 1.84481882 3.22168809 2.81632429 1.65615648 2.3647165  3.92954628
 1.7735662  1.52778199 1.38240409 2.21129598 2.06556483 1.64097546
 1.8698157  2.38486356 2.49389394 2.78581027 2.82685584 2.32486081
 2.20397199 2.58567795 2.84940045 1.67115002 2.0066885  2.09845183
 2.20608615 2.24683012 2.15210626 2.33223456 1.93929116 2.01434502
 2.66704575 2.22803317 2.21099259 1.82384484 1.71267921 1.89423173
 2.96430657 2.01264191 2.60990498 2.59466288 2.43750183 2.31312094
 2.16232079 1.55928666 1.87105309 2.01068862 1.25822542 1.63534433
 1.41926883 2.62974667 2.63638168 1.44226446 1.93185138 2.02869397
 3.23135781 3.21254585 3.11590937 2.8629845  2.54637208 2.71507191
 3.03354864 2.85229089 2.35380971 3.09898326 1.82219013 1.45726

In [90]:
indexes, max_dist = get_indexes(distances, 0, 15)

In [91]:
distances[indexes]

array([1.37157039, 1.36454344, 1.27773571, 1.35788406, 1.2657361 ,
       1.25822542, 0.99742638, 0.967492  , 1.2180685 , 1.21753635,
       1.1830442 , 1.21954442, 1.16250602, 0.34785747, 0.98022504])

In [92]:
print(distances[indexes[0]]/max_dist)

1.0


In [93]:
get_weights(distances[indexes], max_dist)

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 3.57544157e-06, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 7.02503571e-03, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.60344227e-05,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+0

In [62]:
poly = PolynomialFeatures(2)
print(poly.fit_transform(x[indexes]))

[[1.0000e+00 2.3000e+02 7.5000e+01 1.0900e+01 5.2900e+04 1.7250e+04
  2.5070e+03 5.6250e+03 8.1750e+02 1.1881e+02]
 [1.0000e+00 2.7900e+02 7.6000e+01 7.4000e+00 7.7841e+04 2.1204e+04
  2.0646e+03 5.7760e+03 5.6240e+02 5.4760e+01]
 [1.0000e+00 2.3700e+02 7.8000e+01 6.9000e+00 5.6169e+04 1.8486e+04
  1.6353e+03 6.0840e+03 5.3820e+02 4.7610e+01]
 [1.0000e+00 2.7400e+02 6.8000e+01 1.0900e+01 7.5076e+04 1.8632e+04
  2.9866e+03 4.6240e+03 7.4120e+02 1.1881e+02]
 [1.0000e+00 2.9900e+02 6.5000e+01 8.6000e+00 8.9401e+04 1.9435e+04
  2.5714e+03 4.2250e+03 5.5900e+02 7.3960e+01]
 [1.0000e+00 1.1500e+02 7.6000e+01 7.4000e+00 1.3225e+04 8.7400e+03
  8.5100e+02 5.7760e+03 5.6240e+02 5.4760e+01]
 [1.0000e+00 2.5600e+02 6.9000e+01 9.7000e+00 6.5536e+04 1.7664e+04
  2.4832e+03 4.7610e+03 6.6930e+02 9.4090e+01]
 [1.0000e+00 1.1800e+02 7.2000e+01 8.0000e+00 1.3924e+04 8.4960e+03
  9.4400e+02 5.1840e+03 5.7600e+02 6.4000e+01]
 [1.0000e+00 2.9000e+02 6.6000e+01 9.2000e+00 8.4100e+04 1.9140e+04
  2.6680e+03