 # Quasars Classification with RadomForest Supervised Learning

In [1]:
#!/usr/bin/env python3 -Wignore
from ipyparallel import Client
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from support_functions import plot_confusion_matrix, generate_features_targets

  from collections import Sequence
  from collections import Iterable
  from collections import Mapping, namedtuple, defaultdict, Sequence
  from numpy.core.umath_tests import inner1d


* __rf_predict_actual__ function takes two arguments: the data to be analyzed and the number of estimators (n_estimators) to be used in the random forest. The function should return two NumPy arrays containing the predicted and actual (targtets) classes respectively.

In [2]:
def rf_predict_actual(data, n_estimators):
  # generate the features and targets
  features, targets = generate_features_targets(data)

  # instantiate a random forest classifier
  rfc = RandomForestClassifier(n_estimators=n_estimators)
  
  # get predictions using 10-fold cross validation with cross_val_predict
  predicted = cross_val_predict(rfc, features, targets, cv=10)

  # return the predictions and their actual classes
  return predicted, targets

### Load data to be analyzed

In [3]:
data = np.load('sdssdr6_colors_class.200000.npy')

In [None]:
# get the predicted and actual classes
number_estimators = 50              # Number of trees
predicted, targets = rf_predict_actual(data, number_estimators)


# calculate the model score using your function
model_score = accuracy_score(targets, predicted) 
print ("Our accuracy score: %s" %model_score)

# calculate the models confusion matrix using sklearns confusion_matrix function
class_labels = list(set(targets))
model_cm = confusion_matrix(y_true=targets, y_pred=predicted, labels=class_labels)

# Plot the confusion matrix using the provided functions.
plt.figure()
plot_confusion_matrix(model_cm, classes=class_labels, normalize=False)
plt.show()