In [1]:
DATA_PATH = '../../data/CRTS2/'

In [2]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

Load Feature Dataframes

In [3]:
# Loead transient features
indir = DATA_PATH
filename = 'transient_features.pickle' 
inpath = indir + filename
df_feat_tran = pd.read_pickle(inpath)
df_feat_tran.shape

(4384, 20)

In [4]:
# Load permanent Features
indir = DATA_PATH
filename = 'permanent_features.pickle' 
inpath = indir + filename
df_feat_perm = pd.read_pickle(inpath)
df_feat_perm.shape

(4384, 20)

Create inputs and outputs

In [5]:
# Add output class '1' to transient objects
df_feat_tran['is_transient'] = 1
# Add output class '0' to permanent objects
df_feat_perm['is_transient'] = 0

In [6]:
# Merge dataframes
df = df_feat_tran.append(df_feat_perm, ignore_index=True)

In [7]:
# Remove IDs
df = df.drop(['ID'], axis=1)

In [48]:
# Obtain X and y
X = df.drop(['is_transient'], axis=1).as_matrix()
y = df['is_transient'].as_matrix()

Split in Test & Train Sets

In [49]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

In [50]:
X_train.shape, y_train.shape

((5874, 19), (5874,))

In [51]:
X_test.shape, y_test.shape

((2894, 19), (2894,))

In [52]:
# Create scaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Classify using SVC

In [53]:
tuned_parameters = {
    'kernel': ['rbf'],
    'gamma':[1e-1, 1e-2, 1e-3, 1e-4, 1e-5],
    'C': [1, 10, 100, 1000]
}

In [54]:
model = SVC()
clf = GridSearchCV(model, tuned_parameters, cv=2)
clf.fit(X_train, y_train)

GridSearchCV(cv=2, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'gamma': [0.1, 0.01, 0.001, 0.0001, 1e-05], 'kernel': ['rbf'], 'C': [1, 10, 100, 1000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [55]:
clf.best_score_, clf.best_estimator_

(0.83724889342866871, SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
   decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
   max_iter=-1, probability=False, random_state=None, shrinking=True,
   tol=0.001, verbose=False))

In [56]:
clf.score(X_test, y_test)

0.84519695922598481

Classify using RF

In [21]:
tuned_parameters = { 
    'n_estimators': [200, 700],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [22]:
model = RandomForestClassifier()
clf = GridSearchCV(model, tuned_parameters, cv=2)
clf.fit(X_train, y_train)

GridSearchCV(cv=2, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [200, 700], 'max_features': ['auto', 'sqrt', 'log2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [23]:
clf.best_score_, clf.best_estimator_

(0.8537623425263875,
 RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
             max_depth=None, max_features='sqrt', max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=200, n_jobs=1,
             oob_score=False, random_state=None, verbose=0,
             warm_start=False))

In [24]:
clf.score(X_test, y_test)

0.85521769177608842

Classify using NN

In [57]:
tuned_parameters = {
    'learning_rate': ['constant', "invscaling", "adaptive"],
    'hidden_layer_sizes': [(100), (100,100)],
    'alpha': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5],
    'activation': ["logistic", "relu", "tanh"]
}

In [58]:
model = MLPClassifier()
clf = GridSearchCV(model, tuned_parameters, cv=2)
clf.fit(X_train, y_train)

GridSearchCV(cv=2, error_score='raise',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'hidden_layer_sizes': [100, (100, 100)], 'alpha': [0.1, 0.01, 0.001, 0.0001, 1e-05], 'learning_rate': ['constant', 'invscaling', 'adaptive'], 'activation': ['logistic', 'relu', 'tanh']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [59]:
clf.best_score_, clf.best_estimator_

(0.83844058563159685,
 MLPClassifier(activation='relu', alpha=0.1, batch_size='auto', beta_1=0.9,
        beta_2=0.999, early_stopping=False, epsilon=1e-08,
        hidden_layer_sizes=(100, 100), learning_rate='invscaling',
        learning_rate_init=0.001, max_iter=200, momentum=0.9,
        nesterovs_momentum=True, power_t=0.5, random_state=None,
        shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
        verbose=False, warm_start=False))

In [60]:
clf.score(X_test, y_test)

0.84277816171389086