In [1]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
import numpy as np
from datetime import date
import pandas as pd
from pathlib import Path
from collections import Counter

In [3]:
from sklearn.model_selection import train_test_split
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced
from sklearn.preprocessing import StandardScaler

In [76]:
file = Path('./success_df.csv')
df = pd.read_csv(file)
df.head()

Unnamed: 0,price,country,platform,End,Start,compound,negative,neutral,positive,no_of_posts,success
0,185.67,Switzerland,Ethereum,9/28/2017 3:00,9/28/2017 3:00,0.46057,0.06116,0.02739,0.90151,100,1
1,21.07,Japan,Ethereum,7/21/2017 7:00,7/1/2017 7:00,0.563773,0.09667,0.04141,0.86189,100,1
2,3.42,Cayman Islands,Own Blockchain,6/12/2018 0:00,6/26/2017 16:00,0.82847,0.0322,0.85855,0.10927,100,0
3,0.04,Switzerland,Own Blockchain,1/1/2017 0:00,9/1/2015 0:00,0.589474,0.098387,0.032946,0.868753,93,0
4,3.79,Seychelles,Ethereum,9/28/2017 3:00,9/28/2017 3:00,0.525717,0.08163,0.03348,0.88494,100,1


In [77]:
df['End']=df['End'].astype('datetime64')

In [78]:
df['Start']=df['Start'].astype('datetime64')

In [79]:
df['price']=df['price'].astype('float')

In [80]:
# Encoding the non-numerical columns
df = pd.get_dummies(df, columns=["country", "platform", "End", "Start"])

In [None]:
# Save the Preprocessed File
encoded = Path('.csv')
df.to_csv(encoded, index=False)

In [81]:
# Create our features
X = df.drop(columns="success")

# Create our target
y = df["success"].values.reshape(-1, 1)

In [82]:
# Split the X and y into X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [83]:
# Scale the Features Data
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [84]:
# Balanced Random Forest Classifier
brf = BalancedRandomForestClassifier(n_estimators=100, random_state=1)

brf_model = brf.fit(X_train_scaled, y_train)

In [85]:
y_pred_brf = brf.predict(X_test_scaled)

In [86]:
# Calculated the balanced accuracy score
bac_brf = balanced_accuracy_score(y_test, y_pred_brf)

print(f"The Balanced Accuracy Score for the Balanced Random Forest Classifier is: {bac_brf}")

The Balanced Accuracy Score for the Balanced Random Forest Classifier is: 0.974820143884892


In [87]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred_brf)

array([[132,   7],
       [  0,   5]], dtype=int64)

In [88]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred_brf))

                   pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.95      1.00      0.97      0.97      0.94       139
          1       0.42      1.00      0.95      0.59      0.97      0.95         5

avg / total       0.98      0.95      1.00      0.96      0.97      0.95       144



In [89]:
# List the features sorted in descending order by feature importance
importances_sorted = sorted(zip(brf.feature_importances_, X.columns), reverse=True)
importances_sorted

[(0.1579653340033315, 'price'),
 (0.13249107734637341, 'positive'),
 (0.10988532724996956, 'compound'),
 (0.10286294029074394, 'neutral'),
 (0.0951373090493702, 'End_2017-09-28 03:00:00'),
 (0.0860139556161752, 'platform_Ethereum'),
 (0.06870000934711201, 'country_United States of America'),
 (0.06794094642208093, 'Start_2017-09-28 03:00:00'),
 (0.06714144086915647, 'negative'),
 (0.031746031746031744, 'country_Switzerland'),
 (0.02715210966484312, 'country_Singapore'),
 (0.022222222222222223, 'country_Canada'),
 (0.019567032388128538, 'no_of_posts'),
 (0.006302083333333331, 'country_Russia'),
 (0.004872180451127816, 'country_Gibraltar'),
 (0.0, 'platform_Waves'),
 (0.0, 'platform_Stellar'),
 (0.0, 'platform_Scrypt'),
 (0.0, 'platform_Own Blockchain'),
 (0.0, 'platform_Omni'),
 (0.0, 'platform_Nxt'),
 (0.0, 'platform_NEO'),
 (0.0, 'platform_NEM'),
 (0.0, 'platform_Litecoin'),
 (0.0, 'platform_Hyperledger'),
 (0.0, 'platform_Graphene'),
 (0.0, 'platform_EOS'),
 (0.0, 'platform_DAG'),
 (