<a href="https://colab.research.google.com/github/danielbauer1979/CAS_PredMod/blob/main/pa_pynb_sess7_WineExample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np 
import matplotlib.pyplot as plt  
import pandas as pd 
import seaborn as sns
import graphviz
import pydot
from io import StringIO  

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, export_graphviz
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier,BaggingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error,confusion_matrix, classification_report, roc_curve, auc

## WINE EXAMPLE

In [None]:
!git clone https://github.com/danielbauer1979/CAS_PredMod.git

In [None]:
wine = pd.read_csv('CAS_PredMod/pa_data_winequality-red.csv', sep = ';')
wine.head()

In [None]:
wine['quality'] = wine['quality'] > 6
wine['quality'].describe()

In [5]:
np.random.seed(42)
train, test = train_test_split(wine, test_size = 0.5)
val, test = train_test_split(test, test_size = 0.5)
X_train = train.drop(columns = ['quality']).values
y_train = train['quality'].values
X_val = val.drop(columns = ['quality']).values
y_val = val['quality'].values
X_test = test.drop(columns = ['quality']).values
y_test = test['quality'].values

Tree Classifier

In [6]:
import pydot
from IPython.display import Image
def print_tree(estimator, features, class_names=None, filled=True):
  tree = estimator
  names = features
  color = filled
  classn = class_names
  dot_data = StringIO()
  export_graphviz(estimator, out_file=dot_data,feature_names=features,class_names=classn, filled=filled)
  graph = pydot.graph_from_dot_data(dot_data.getvalue())
  return(graph)

In [None]:
wine_tree = DecisionTreeRegressor(min_samples_split=5,min_impurity_decrease=0.002)
wine_tree.fit(X_train, y_train)
graph, = print_tree(wine_tree, features= train.drop(columns = ['quality']).columns)
Image(graph.create_png())

In [8]:
wine_tree_preds = wine_tree.predict(X_test)

In [None]:
fpr, tpr, threshold = roc_curve(test['quality'], wine_tree_preds)
roc_auc = auc(fpr, tpr)
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

Random Forest

In [None]:
wine_rf = RandomForestRegressor(max_features='sqrt', n_estimators=250, random_state=1)
wine_rf.fit(X_train, y_train)

In [11]:
wine_rf_preds = wine_rf.predict(X_test)

In [None]:
fpr, tpr, threshold = roc_curve(test['quality'], wine_rf_preds)
roc_auc = auc(fpr, tpr)
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
confusion_matrix(test['quality'], (wine_rf_preds > 0.5))

Boosting

In [None]:
wine_boost = GradientBoostingRegressor(n_estimators=1000, learning_rate=0.05,random_state=1)
wine_boost.fit(X_train, y_train)

In [15]:
wine_boost_preds = wine_boost.predict(X_test)

In [None]:
fpr, tpr, threshold = roc_curve(test['quality'], wine_boost_preds)
roc_auc = auc(fpr, tpr)
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()