In [49]:
import numpy as np
import math
import numpy.linalg as linalg
import pandas as pd
from matplotlib import pyplot as plt
import sklearn as skl
from sklearn.preprocessing import PolynomialFeatures
import heapq


In [27]:
def tricubic(x):
    y = np.zeros_like(x)
    idx = (x >= 0) & (x <= 1)
    y[idx] = np.power(1.0 - np.power(np.abs(x[idx]), 3), 3)
    return y

In [28]:
def normalize(array):
    stds = array.std(axis=0)
    
    return (array- array.mean(axis=0))/stds

In [29]:
def get_indexes(distances, q):
    mins = []
    for i in range(0, len(distances)):
        if len(mins) >= q:
            if mins[0][0] < -1*distances[i]:
                heapq.heapreplace(mins, (-1*distances[i], i))
        else:
            heapq.heappush(mins, (-1*distances[i], i))

    indexes = []
    max_dist = 0.0
    for (dist, index) in mins:
        indexes.append(index)
        if -1*dist > max_dist:
            max_dist = -1*dist
    return indexes, max_dist

In [30]:
def get_weights(distances, max_dist):
    weights = tricubic(distances/max_dist)
    return np.diag(weights)

In [40]:
def loess(data, Y, f=0.1, fit='quadratic', ord=2):
    cant = len(Y)
    q = int(math.ceil(f*cant))
    data_norm = normalize(data)
    if fit == 'quadratic':
        poly = PolynomialFeatures(2)
    
    y_ests = np.zeros_like(Y,dtype="float64")
    
    for i in range(0, cant):
        distances = linalg.norm(data_norm - data_norm[i], ord=ord, axis=1)
        indexes, max_dist = get_indexes(distances, q)
        W = get_weights(distances[indexes], max_dist)
        b = Y[indexes]
        if fit=='quadratic':
            A = poly.fit_transform(data_norm[indexes])
        else:
            A = np.append(np.ones((len(indexes),1)), data_norm[indexes], axis=1)
            
        At = np.transpose(A)
        try:
            coeffs = linalg.solve(np.dot(At, np.dot(W, A)), np.dot(At, np.dot(W, b)))
        except:

            U, E, Vt = linalg.svd(np.dot(W, A))
            V = np.transpose(Vt)
            Ut = np.transpose(U)
            
            Et = np.zeros((np.shape(V)[1], np.shape(Ut)[0]))
            
            for j in range(0, len(E)):
                if E[j] != 0:
                    Et[j,j] = 1/E[j]
            
            MPInv = np.dot(V, np.dot(Et, Ut))
            coeffs = np.dot(MPInv, np.dot(W, b))
        
        if fit=='quadratic':
            
            y_est = np.dot(poly.fit_transform(data_norm[i].reshape(1, -1)),coeffs)
        else:
            y_est = np.dot(np.append([1.0], data_norm[i]), coeffs)
        
        y_ests[i] = y_est
    
    return y_ests

In [41]:
data = pd.read_csv('../data/dataset.csv',usecols=['ozone', 'radiation', 'temperature', 'wind'])

In [47]:
x=data[['radiation', 'temperature', 'wind']].to_numpy()
y = data['ozone'].to_numpy()
estimates = loess(x, y, 0.1, 'quadratic', 2)
print(estimates)
print(y)

[ 41.00109371  35.99706359  12.03397047  18.0020976   23.00166639
  19.00123742   8.00167605  16.00085048  10.63323158  14.00293491
  18.01129974  13.9992086   34.00105272   5.99686999  30.01421633
  11.01033671   0.93346062  11.00009637   3.95322519  31.99988955
  22.98091718  44.98769639 111.13648785  37.00087849  29.04711432
  70.99689494  38.9470335   23.00041929  19.55893437  37.00024769
  19.9999979   12.00893828  13.25282132 135.02088474  49.02427132
  31.70896754  63.99960992  39.98904218  77.00226053  96.88681747
  97.00000652  83.44505799  10.03237185  26.94891485   7.00001172
  48.02200124  35.15649593  60.97444605  78.99997521  62.97671412
  16.00633749  79.51860331 106.72556462  19.93064615  52.
  83.7551075   50.0567536   66.3895108   58.95006554  38.13212472
   9.00004024  16.16137744 121.0910266   88.76814633 110.0004877
  43.99990361  27.49282791  64.99751232  21.97099924  59.00687306
  22.98456853  32.38100487  42.20110407  20.75871909   8.98719219
  44.9862452  167.9

In [56]:
cols = list(data.columns)
fig, axs = plt.subplots(4,4,sharex='all', sharey='all')
for i in range(0,4):
    for j in range(0,4)
        data.plot(kind='scatter', subplots=True, sharey=True,layout=(4,4))

radiation


ValueError: scatter requires an x and y column