## Imports

In [66]:
import pandas as pd
import pickle as pk
import plotly.express as px
from sklearn.metrics import roc_curve, auc
import os

In [67]:
fold = os.getcwd()
fold

'/Users/listonlab/MPHIL/MTR/Scripts'

# Random Forest

#### ST1

In [68]:
file = open(fold + "/data/randomforest_scaled2.dat","rb")
scaled = pk.load(file)
file.close()
file = open(fold+"/data/randomforest_log_scaled.dat","rb")
logscaled = pk.load(file)
file.close()
file = open(fold+"/data/randomforest_batch.dat","rb")
batch = pk.load(file)
file.close()
file = open(fold+"/data/randomforest_log_batch.dat","rb")
logbatch = pk.load(file)
file.close()


In [69]:
print("Accuracy scaled "+ str(scaled['acuracy']))
print("Accuracy log scaled "+ str(logscaled['acuracy']))
print("Accuracy batch scaled "+ str(batch['acuracy']))
print("Accuracy log batch scaled "+ str(logbatch['acuracy']))



Accuracy scaled 0.6957339449541284
Accuracy log scaled 0.6955963302752294
Accuracy batch scaled 0.6809174311926606
Accuracy log batch scaled 0.6824311926605504


In [70]:
import plotly.graph_objects as go

def plot_roc_curves(fprs, tprs, titles):
    fig = go.Figure()

    colors = ['darkblue', 'lightblue', 'darkred', 'pink']
    line_styles = ['solid', "dash",'solid', "dash" ]

    for i in range(len(fprs)):
        fig.add_trace(
            go.Scatter(
                x=fprs[i],
                y=tprs[i],
                mode='lines',
                name=titles[i],
                line=dict(color=colors[i], dash=line_styles[i])
            )
        )

    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', line=dict(dash='dot', color='slategray'), name='Random', showlegend=False))

    fig.update_layout(
        title='ROC Curves',
        xaxis=dict(title='False Positive Rate'),
        yaxis=dict(title='True Positive Rate'),
        width=700,
        height=500
    )

    fig.show()

titles = []
fpr_scaled, tpr_scaled, thresholds_scaled = roc_curve(scaled["y_t"], scaled["y_t_pred"])
titles.append(f'Scaled (AUC={auc(fpr_scaled, tpr_scaled):.4f})')
fpr_logscaled, tpr_logscaled, thresholds_logscaled = roc_curve(logscaled["y_t"], logscaled["y_t_pred"])
titles.append(f'Log Scaled (AUC={auc(fpr_logscaled, tpr_logscaled):.4f})')
fpr_batch, tpr_batch, thresholds_batch = roc_curve(batch["y_t"], batch["y_t_pred"])
titles.append(f'Batch Scaled (AUC={auc(fpr_batch, tpr_batch):.4f})')
fpr_logbatch, tpr_logbatch, thresholds_logbatch = roc_curve(logbatch["y_t"], logbatch["y_t_pred"])
titles.append(f'Log Batch Scaled (AUC={auc(fpr_logbatch, tpr_logbatch):.4f})')

fprs = [fpr_scaled, fpr_logscaled, fpr_batch, fpr_logbatch]
tprs = [tpr_scaled, tpr_logscaled, tpr_batch, tpr_logbatch]
plot_roc_curves(fprs, tprs, titles)


In [71]:
import plotly.graph_objects as go
import plotly.subplots as sp

# Prepare the data for each category
df_scaled = scaled["importance"]
df_scaled.index = df_scaled.mark
df_scaled.sort_values("importance", inplace=True)
df_scaled = df_scaled.iloc[::-1]  # Reverse the order of the dataframe

df_logscaled = logscaled["importance"]
df_logscaled.index = df_logscaled.mark
df_logscaled.sort_values("importance", inplace=True)
df_logscaled = df_logscaled.iloc[::-1]  # Reverse the order of the dataframe

df_batch = batch["importance"]
df_batch.index = df_batch.mark
df_batch.sort_values("importance", inplace=True)
df_batch = df_batch.iloc[::-1]  # Reverse the order of the dataframe

df_logbatch = logbatch["importance"]
df_logbatch.index = df_logbatch.mark
df_logbatch.sort_values("importance", inplace=True)
df_logbatch = df_logbatch.iloc[::-1]  # Reverse the order of the dataframe

# Create subplots
fig = sp.make_subplots(rows=2, cols=2, subplot_titles=['Scaled', 'Log Scaled', 'Batch Scaled', 'Log Batch Scaled'])

# Define custom colors
colors = ['darkblue', 'lightblue', 'darkred', 'pink']

# Add bar traces for each category
fig.add_trace(go.Bar(x=df_scaled.index, y=df_scaled.importance, name='Scaled', marker=dict(color="darkblue")), row=1, col=1)
fig.add_trace(go.Bar(x=df_logscaled.index, y=df_logscaled.importance, name='Log Scaled', marker=dict(color="lightblue")), row=1, col=2)
fig.add_trace(go.Bar(x=df_batch.index, y=df_batch.importance, name='Batch Scaled', marker=dict(color="darkred")), row=2, col=1)
fig.add_trace(go.Bar(x=df_logbatch.index, y=df_logbatch.importance, name='Log Batch Scaled', marker=dict(color="pink")), row=2, col=2)

# Update layout
fig.update_layout(width=800, height=1000, showlegend=False)
fig.update_xaxes(tickangle=300)

# Show the plot
fig.show()


### ST2

### ST3

# Logistic Regression

# Support Vector Machine