# Data Science in Python
## am Beispiel *California Housing*


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
% pylab inline

#### Laden der Daten (Trainings- und Validierungsdatensatz)

In [None]:
from utilities import load_data
Xtrain, ytrain, Xtest, ytest = load_data()
Xtrain.head()

#### Plotten der Zielverteilung

In [None]:
plt.hist(ytrain, bins=50)
plt.show()

### Trivialmodell: Mittelwert

In [None]:
from utilities import evaluate
trivialprognose = np.mean(ytrain)
evaluate(trivialprognose, ytest)

## Erstes Modell: Lineare Regression


In [None]:
from sklearn.linear_model import LinearRegression

est = LinearRegression()

est.fit(Xtrain, ytrain)
prediction = est.predict(Xtest)

evaluate(prediction, ytest)

## Zweites Modell: Support Vektor Regression


In [None]:
from sklearn.svm import SVR

est = SVR(max_iter=5000)
est.fit(Xtrain, ytrain)

prediction = est.predict(Xtest)
evaluate(prediction, ytest)

## Erweiterung mit Skalierungsverfahren


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('regressor', SVR(max_iter=5000))
    ])
pipe.fit(Xtrain, ytrain)

prediction = pipe.predict(Xtest)
evaluate(prediction, ytest)

## Weitere Betrachtung Zweidimensionale Korrelationen


In [None]:
Xtrain, ytrain, Xtest, ytest = load_data()

from utilities import visualize
visualize(ytest)

## Erweiterung: Separate Regression der Geokoordinaten


In [None]:
from utilities import RegressionOnSubset
from sklearn.neighbors import KNeighborsRegressor

Xtrain, ytrain, Xtest, ytest = load_data()

columns = ["Longitude", "Latitude"]

pipe = Pipeline([
         ('geo_regressor', RegressionOnSubset(
            KNeighborsRegressor(), columns)),
        ('scaler', StandardScaler()),
        ('regressor', SVR(max_iter=5000)),
    ])

pipe.fit(Xtrain, ytrain)


### Visualisieren der Ergebnisse

In [None]:
Xtrain, ytrain, Xtest, ytest = load_data()
prediction = pipe.predict(Xtest)


In [None]:
Xtest.head()
visualize(Xtest.knearest)

In [None]:
evaluate(prediction, ytest)