# Predictive Analysis

In [29]:
import sys
import os
import warnings
warnings.filterwarnings('ignore')

In [2]:
import IPython
# from IPython.display import Image
# from IPython.core.display import HTML
from IPython.display import clear_output
from IPython.display import display
from tqdm.notebook import tqdm # FOR FANCY GREEN BAR

In [3]:
import numpy as np
import pandas as pd
import polars as pr # new pkg similar to pandas but faster
import glob

In [4]:
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
import seaborn as sns
import plotly

# plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 100 # e.g. default 100 but 300 would be a really fine plot, but slower
plt.style.use('fivethirtyeight')

In [5]:
import re
import random
import time
from datetime import datetime as dt
import scipy
import statsmodels.api as sm

In [19]:
import sklearn
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from xgboost import XGBRegressor

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, jaccard_score, f1_score, r2_score, roc_curve, auc, log_loss, classification_report

classifiers = {
    "LogisticRegression" : LogisticRegression(),
    "KNeighbors" : KNeighborsClassifier(),
    "SVC" : SVC(),
    "DecisionTree" : DecisionTreeClassifier(),
    "RandomForest" : RandomForestClassifier(),
    "XGBoost" : XGBClassifier()
}

In [7]:
from lazypredict.Supervised import LazyClassifier, LazyRegressor

# Data

In [8]:
random.seed(42)

In [9]:
# True labels
y_true = [0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0]  

# Predicted labels
y_pred= [1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1] 


In [10]:
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
# Accuracy
accuracy = accuracy_score(y_true, y_pred)
# Precision
precision = precision_score(y_true, y_pred)
# Recall
recall = recall_score(y_true, y_pred)
# F1-Score
f1 = f1_score(y_true, y_pred)
# ROC Curve and AUC
fpr, tpr, thresholds = roc_curve(y_true, y_pred)
roc_auc = auc(fpr, tpr)

# Jaccurred
jc = jaccard_score(y_true, y_pred)


# print(Confusion Matrix)
# print(cm)
print(f" Accuracy \t{accuracy}")
print(f" Precision \t {precision}")
print(f" Recall \t {recall}")
print(f" F1 \t {f1}")
print(f" ROC \t {roc_auc}")
print(f" Jaccurred \t {jc}")
# print(f"{classification_report(y_test, y_pred)}")

 Accuracy 	0.72
 Precision 	 0.7307692307692307
 Recall 	 0.7307692307692307
 F1 	 0.7307692307692307
 ROC 	 0.7195512820512819
 Jaccurred 	 0.5757575757575758


### Multilabeled

In [11]:
y_true = ['a','b','c','d']
y_pred = ['c','a','c','d']

In [12]:
# Accuracy
accuracy = accuracy_score(y_true, y_pred)
# Precision
precision = precision_score(y_true, y_pred, average="weighted")
# Recal
recall = recall_score(y_true, y_pred, average="weighted")
# F1-Score
f1 = f1_score(y_true, y_pred, average="weighted")
# Jaccurred
jc = jaccard_score(y_true, y_pred, average="weighted")

# ROC Curve and AUC
# fpr, tpr, thresholds = roc_curve(y_true, y_pred)
# roc_auc = auc(fpr, tpr)


print(f" Accuracy \t{accuracy}")
print(f" Precision \t {precision}")
print(f" Recall \t {recall}")
print(f" F1 \t {f1}")
# print(f" ROC \t {roc_auc}")
print(f" Jaccurred \t {jc}")

 Accuracy 	0.5
 Precision 	 0.375
 Recall 	 0.5
 F1 	 0.41666666666666663
 Jaccurred 	 0.375


In [13]:
# # Plot ROC curve
# plt.figure()

# # Plot the ROC curve with a label displaying the ROC AUC score
# plt.plot(fpr, tpr, color='darkorange', lw=2,
#          label='ROC curve (area = %0.2f)' % roc_auc)

# # Plot a dashed diagonal line for reference
# plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')

# # Set the x and y-axis limits
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])

# # Label the x and y-axes
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')

# # Set the title of the plot
# plt.title('Receiver Operating Characteristic')

# # Add a legend to the plot
# plt.legend(loc='lower right')

# # Display the ROC curve plot
# plt.show()

# Reference

- https://scikit-learn.org/stable/api/sklearn.metrics.html