# multivariate lowess

scikit-misc

In [1]:
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skmisc.loess import loess
import json
from pyecharts import Scatter3D


## 方法

In [16]:
def get_model(x,y,span=0.35):
    '''
    '''
    lo = loess(x,y,span=span)
    lo.fit()
    return lo

def loess_fit(x, y, span=0.75):
    """
    loess fit and confidence intervals
    """
    # setup
    lo = loess(x, y, span=span)
    # fit
    lo.fit()
    # Predict
    prediction = lo.predict(x, stderror=True)
    # Compute confidence intervals
    ci = prediction.confidence(0.05)
    # Since we are wrapping the functionality in a function,
    # we need to make new arrays that are not tied to the
    # loess objects
    yfit = np.array(prediction.values)
    ymin = np.array(ci.lower)
    ymax = np.array(ci.upper)
    return yfit, ymin, ymax

def echart3D_tsne(pddata,label,x_title,y_title,z_title):
    scatter3D = Scatter3D("T-SNE", width=1200, height=600)
    for lab in set(pddata[label]):
        data=pddata[pddata[label]==lab]
        jsondata = json.loads(data[[x_title,y_title,z_title]].to_json(orient='values'))
        scatter3D.add(str(lab),
                      jsondata,
                      xaxis3d_name='TSNE1',
                      yaxis3d_name='TSNE2',
                      zaxis3d_name='TSNE3',)
    return scatter3D

def merge(x,y):
    my_list=[]
    for i in range(x.shape[0]):
        my_list.append([x[i,0],x[i,1],y[i]])
    return my_list

def merge(x,y,z):
    my_list=[]
    for i in range(x.shape[0]):
        my_list.append([x[i],y[i],z[i]])
    return my_list

def merge_2(x,y):
    my_list=[]
    for i in range(x.shape[0]):
        my_list.append([x[i],y[i]])
    return my_list

## 测试数据

In [3]:
np.random.seed(123)
n=1000
x = np.random.randn(n, 2)
f = -1 * np.sin(x[:,0]) + 0.5 * np.cos(x[:,1]) + 0.2*np.random.randn(n)
x0 = np.random.randn(10, 2)

In [5]:
print(x.shape,f.shape,x0.shape)
model = get_model(x,f,0.1)
y_predict = model.predict(x0,stderror=True).values

test=merge(x,f)
test2=merge(x0,y_predict)
test3=merge(x,model.predict(x,stderror=True).values)
for i in range(len(x0)):
    print(x0[i],y_predict[i])

(1000, 2) (1000,) (10, 2)
[-0.45059869  0.60959032] -3.105036184601418e+231
[1.17374438 0.87181459] -3.105036184601418e+231
[1.90472342 0.13349125] 0.17650665434121052
[ 1.28184384 -1.15918677] 0.40232183989718623
[ 0.87050189 -0.20963534] 0.9051561157828177
[-0.14667131  1.38059123] 0.22776243055709292
[ 0.96863888 -0.16812921] 0.3040623673000397
[2.55223016 1.2588297 ] 0.4529981942646589
[-0.44176929 -2.42284423] 0.38031250463695804
[ 0.27781194 -0.39105517] 6.99907175959738e-309


In [6]:
# fig = plt.figure()
# ax = fig.add_subplot(111, projection='3d')
# print(x.shape,f.shape,x0.shape,y_predict.shape)

scatter3D = Scatter3D("T-SNE", width=1200, height=600)

scatter3D.add('train',test
                      ,
                      xaxis3d_name='TSNE1',
                      yaxis3d_name='TSNE2',
                      zaxis3d_name='TSNE3',)
# scatter3D.add('test',test2
#                       ,
#                       xaxis3d_name='TSNE1',
#                       yaxis3d_name='TSNE2',
#                       zaxis3d_name='TSNE3',)
scatter3D.add('validation',test3
                      ,
                      xaxis3d_name='TSNE1',
                      yaxis3d_name='TSNE2',
                      zaxis3d_name='TSNE3',)
scatter3D

In [7]:
yfit, ymin, ymax = loess_fit(x,f,0.03)

In [8]:
test=merge(x,f)
test3=merge(x,yfit)
test2=merge(x,ymin)
scatter3D = Scatter3D("T-SNE", width=1200, height=600)

scatter3D.add('train',test
                      ,
                      xaxis3d_name='TSNE1',
                      yaxis3d_name='TSNE2',
                      zaxis3d_name='TSNE3',)
scatter3D.add('test',test2
                      ,
                      xaxis3d_name='TSNE1',
                      yaxis3d_name='TSNE2',
                      zaxis3d_name='TSNE3',)
scatter3D.add('validation',test3
                      ,
                      xaxis3d_name='TSNE1',
                      yaxis3d_name='TSNE2',
                      zaxis3d_name='TSNE3',)
scatter3D

## GC LENGTH lowess

In [20]:
n=1000
gc = np.random.randint(20,90,n)
length = np.random.randint(150,600,n)
depth = gc+length+np.random.rand(n)
print(gc.shape,length.shape,depth.shape)

(1000,) (1000,) (1000,)


In [31]:
yfit,ymin,ymax=loess_fit(merge_2(gc,length),depth,0.05)

In [32]:
scatter3D = Scatter3D("gc-length", width=1200, height=600)

scatter3D.add('train',merge(gc,length,depth)
                      ,
                      xaxis3d_name='GC',
                      yaxis3d_name='LENGTH',
                      zaxis3d_name='DEPTH',)
scatter3D.add('min',merge(gc,length,ymin)
                      ,
                      xaxis3d_name='GC',
                      yaxis3d_name='LENGTH',
                      zaxis3d_name='DEPTH',)
scatter3D.add('validation',merge(gc,length,yfit)
                      ,
                      xaxis3d_name='GC',
                      yaxis3d_name='LENGTH',
                      zaxis3d_name='DEPTH',)
scatter3D