# Removing noise
This exercise uses PDAL to remove unwanted noise in an ALS collection.

## Exercise
PDAL provides the outlier filter to apply a statistical filter to data.

Because this operation is somewhat complex, we are going to use a pipeline to define it.

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import pdal
import numpy as np
from sklearn.neighbors.kde import KernelDensity

## Pipeline breakdown

### 1. Reader

### 2. filters.outlier
The PDAL outlier filter does most of the work for this operation.

```json
{
  "type": "filters.outlier",
  "method": "statistical",
  "multiplier": 3,
  "mean_k": 8
},
```

In [2]:
pc_filename='./data/workshop/S1C1_csd_004.csd'
json = u'''
{
  "pipeline":[
    "%s"
  ]
}''' % pc_filename

p = pdal.Pipeline(json)
p.validate()
p.loglevel = 8
count = p.execute()
log = p.log
print(log)




In [3]:
print(count)

7418613


In [4]:
data = p.arrays[0]

In [5]:
data.dtype.names

(u'X',
 u'Y',
 u'Z',
 u'GpsTime',
 u'ReturnNumber',
 u'NumberOfReturns',
 u'EchoRange',
 u'Intensity',
 u'ScanAngleRank')

In [6]:
def dim_kde(arr, lb, ub, n=500, bw=0.25, k='epanechnikov'):
    f = plt.figure()
    xp = np.linspace(lb, ub, n)[:, np.newaxis]
    kde = KernelDensity(kernel=k, bandwidth=bw).fit(arr[:, np.newaxis])
    log_pdf = kde.score_samples(xp)
    # We add a few lines here to identify the peak of the KDE and report its location and magnitude.
    ind = np.argmax(np.exp(log_pdf))
    print(np.exp(log_pdf)[ind],xp[ind])
    ax = f.add_subplot(111)
    ax.plot(xp, np.exp(log_pdf))

In [7]:
data['Intensity'].min(), data['Intensity'].max(), data['Intensity'].mean()

(0, 15794, 162.79404775528795)

In [8]:
# dim_kde(data['Intensity'], 11, 333, n=100, bw=1.0)

In [9]:
np.percentile(data['Intensity'],2)

11.0

In [10]:
json = u'''
{
  "pipeline":[
    "%s",
    {
      "type":"filters.reprojection",
      "out_srs":"EPSG:32614"
    },
    {
      "type":"filters.range",
      "limits":"Intensity[:333]"
    },
    "./S1C1_csd_004.laz"
  ]
}''' % pc_filename

p = pdal.Pipeline(json)
p.validate()
p.loglevel = 8
count = p.execute()
log = p.log
print(count,'\n',log)

(7272482, '\n', u'(pypipeline writers.las Debug) Wrote 7272482 points to the LAS file\n')


In [11]:
data2 = p.arrays[0]

In [12]:
data2['X'].min(), data2['X'].max(), data2['X'].mean()

(596817.36393247196, 597271.79786267015, 597049.09491513984)