# SVM

## Einleitung



In [21]:
import pandas as pd
import numpy as np
#data visualization
import matplotlib.pyplot as plt
import librosa
from sklearn.model_selection import train_test_split

from scipy.stats import kurtosis
from scipy.stats import skew

pd.options.display.precision = 10

## Umgebungsvariablen

In [22]:
#feature generated 24.03.2019
earthquake_daten = '''C:/studium/studium/CAS_PML/Projekt_Arbeit/earthquake/Daten/earthquake_data/'''
feature_62900_94 = 'Features_62900-94.csv'
feature_41934_94 = 'Features_41934_94.csv'
Features_4194_94 = 'Features_4194_94.csv'

# Feature laden

In [23]:
train_data = pd.read_csv(earthquake_daten+feature_41934_94)

In [24]:
feature_data = train_data.iloc[:,1:95]
time_to_failure = np.array(train_data.iloc[:,-1])

## Feature Filtern

In [22]:
items_to_filter = ['']

In [23]:
filtered_feature_data = feature_data.filter(items=items_to_filter)

In [24]:
feature_data = filtered_feature_data

## Training / Testdaten vorbereiten

In [41]:
X_train, X_test, y_train, y_test = train_test_split(
    feature_data,
    time_to_failure,
    random_state=0,
    test_size=0.25)

# Model Workbench

## Imports

In [25]:
# Scaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# Model selection
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

# Modell
from sklearn.svm import SVR


In [26]:
def modelfit(alg, dtrain, time_to_failure, performCV=True, printFeatureImportance=True, cv_folds=5):
    #Fit the algorithm on the data
    alg.fit(dtrain, time_to_failure)
      
    #Predict training set:
    dtrain_predictions = alg.predict(dtrain)

    #Perform cross-validation:
    if performCV:
        cv_score = cross_val_score(alg, dtrain, time_to_failure, cv=cv_folds, scoring='neg_mean_absolute_error',n_jobs=-1)
    
    #Print model report:
    print("\nModel Report")
    print("MAE : %.4g" % mean_absolute_error(time_to_failure, dtrain_predictions))
    
    if performCV:
        print("CV Score : Mean - %.7g | Std - %.7g | Min - %.7g | Max - %.7g" % (np.mean(cv_score),np.std(cv_score),np.min(cv_score),np.max(cv_score)))
        
    #Print Feature Importance:
    if printFeatureImportance:
        feat_imp = pd.Series(alg.feature_importances_, dtrain.columns).sort_values(ascending=False)
        feat_imp.plot(kind='bar', title='Feature Importances',figsize=(15,10))
        plt.ylabel('Feature Importance Score')

# Baseline

## Scale

In [27]:
min_max_scaler = MinMaxScaler()
feature_data_minMax = min_max_scaler.fit_transform(feature_data)

  return self.partial_fit(X, y)


In [28]:
standard_scaler = StandardScaler()
feature_data_standard = standard_scaler.fit_transform(feature_data)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


## Fit SVR linear

### MinMax Scaler

In [29]:
gbm0 = SVR(kernel='linear')
modelfit(gbm0, feature_data_minMax, time_to_failure,printFeatureImportance=False)


Model Report
MAE : 2.061
CV Score : Mean - -2.15157 | Std - 0.4187177 | Min - -2.456785 | Max - -1.325667


### Standard Scaler

In [30]:
gbm0 = SVR(kernel='linear')
modelfit(gbm0, feature_data_standard, time_to_failure,printFeatureImportance=False)


Model Report
MAE : 2.048
CV Score : Mean - -2.156031 | Std - 0.4135659 | Min - -2.455205 | Max - -1.34103


## Fit SVR rbf

### MinMax Scaler

In [None]:
gbm1 = SVR(kernel='rbf')
modelfit(gbm1, feature_data_minMax, time_to_failure,printFeatureImportance=False)

### Standard Scaler

In [16]:
gbm1 = SVR(kernel='rbf')
modelfit(gbm1, feature_data_standard, time_to_failure,printFeatureImportance=False)




Model Report
MAE : 2.099
CV Score : Mean - -2.158668 | Std - 0.4004703 | Min - -2.42828 | Max - -1.36773


# Parameter Tuning