In [83]:
from datetime import date
from prophet import Prophet
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv("../Resources/btcjoin.csv", parse_dates=['date'])
df = df.drop(columns=['volume','change', 'low', 'high', 'open'])
df = df.rename(columns={"value": "wallets"})
df['price']=df['price'].str.replace(',','')
df['price']=df['price'].astype("float")

#https://dcresearch.medium.com/metcalfes-law-and-bitcoin-s-value-2b99c7efd1fa

In [84]:
df['Metcafe']=df['address']**2
df['value'] = df['Metcafe']/df['mined']
df["value"] = df["value"].map("{:.2f}".format)
df['value']=df['value'].astype("float")
df['networkvalue'] = df["price"] - df["value"]

In [85]:
df['200D'] = df['price'].rolling(200).mean()
df['300D'] = df['price'].rolling(300).mean()
df['50D'] = df['price'].rolling(50).mean()
df = df.dropna()

In [86]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(name="Actual", x=df['date'], y=df['price'], legendrank=4))
fig.add_trace(go.Scatter(name="200 Day", x=df['date'], y=df['200D'], legendrank=2))
fig.add_trace(go.Scatter(name="300 Day", x=df['date'], y=df['300D'], legendrank=2))
fig.add_trace(go.Scatter(name="50 Day", x=df['date'], y=df['50D'], legendrank=2))

fig.update_xaxes(
    rangeslider_visible = True,
    rangeselector = dict(
        buttons = list([
            dict(count = 1, label = "1m", step = "month", stepmode = "backward"),
            dict(count = 6, label = "6m", step = "month", stepmode = "backward"),
            dict(count = 1, label = "YTD", step = "year", stepmode = "todate"),
            dict(count = 1, label = "1y", step = "year", stepmode = "backward"),
            dict(step = "all")
        ])
    )
)
fig.update_yaxes(fixedrange=False)

fig.update_layout(title_text='Bitcoin Moving Day Averages')
fig.update_yaxes(ticklabelposition="inside top", title="Bitcoin Wallets")
fig.update_xaxes(ticklabelposition="inside top", title="Date")
fig.update_yaxes(nticks=15)
fig.update_xaxes(nticks=20)
fig.update_layout(
    margin=dict(l=20, r=100, t=70, b=20),
)
fig.show()
fig.write_html('../static/movingaverages.html')



In [87]:
df['meanavge'] = (df['200D'] + df['300D'] + df['50D'] )/3
df

Unnamed: 0,date,price,wallets,address,mined,Metcafe,value,networkvalue,200D,300D,50D,meanavge
299,2011-07-21,13.6,2,25137.0,6863850.00,6.318688e+08,92.06,-78.46,5.6655,3.844333,16.604,8.704611
300,2011-07-22,13.7,2,24163.0,6883350.00,5.838506e+08,84.82,-71.12,5.7325,3.889667,16.666,8.762722
301,2011-07-23,13.7,2,21647.0,6892150.00,4.685926e+08,67.99,-54.29,5.7995,3.935000,16.654,8.796167
302,2011-07-24,14.0,2,22920.0,6885850.00,5.253264e+08,76.29,-62.29,5.8680,3.981333,16.556,8.801778
303,2011-07-25,14.1,2,26711.0,6908450.00,7.134775e+08,103.28,-89.18,5.9370,4.028000,16.504,8.823000
...,...,...,...,...,...,...,...,...,...,...,...,...
4381,2022-10-21,19162.6,84879615,924681.0,19185681.25,8.550350e+11,44566.31,-25403.71,25662.9880,31072.760000,19619.662,25451.803333
4382,2022-10-22,19204.8,84888147,830404.0,19186618.75,6.895708e+11,35940.19,-16735.39,25531.4820,30967.509667,19604.704,25367.898556
4383,2022-10-23,19571.2,84893809,804140.0,19187600.00,6.466411e+11,33700.99,-14129.79,25413.4730,30863.819667,19599.500,25292.264222
4384,2022-10-24,19331.5,84902680,919344.0,19188556.25,8.451934e+11,44046.74,-24715.24,25292.8905,30769.774000,19586.132,25216.265500


In [88]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(name="Value", x=df['date'], y=df['value'], marker = {'color' : 'orange'}, legendrank=4))
fig.add_trace(go.Scatter(name="MeanAvg", x=df['date'], y=df['meanavge'], marker = {'color' : 'green'}, legendrank=2))
fig.add_trace(go.Scatter(name="Actual", x=df['date'], y=df['price'],marker = {'color' : 'purple'}, legendrank=2))


fig.update_xaxes(
    rangeslider_visible = True,
    rangeselector = dict(
        buttons = list([
            dict(count = 1, label = "1m", step = "month", stepmode = "backward"),
            dict(count = 6, label = "6m", step = "month", stepmode = "backward"),
            dict(count = 1, label = "YTD", step = "year", stepmode = "todate"),
            dict(count = 1, label = "1y", step = "year", stepmode = "backward"),
            dict(step = "all")
        ])
    )
)
fig.update_yaxes(fixedrange=False)
fig.update_layout(title_text='Bitcoin Moving Averages and Value')
fig.update_yaxes(ticklabelposition="inside top", title="Bitcoin Price")
fig.update_xaxes(ticklabelposition="inside top", title="Date")
fig.update_yaxes(nticks=15)
fig.update_xaxes(nticks=20)
fig.update_layout(
    margin=dict(l=20, r=100, t=70, b=20),
)
fig.show()
fig.write_html('../static/MAwithValue.html')

In [89]:
df = df.drop(columns=['200D','300D', '50D'])
df['meanvalue'] = df["price"] - df["meanavge"]
# df['status'] = df['networkvalue'].apply(lambda x: '1' if x > 0 else '0')
df['status'] = df['meanvalue'].apply(lambda x: '1' if x > 0 else '0')
# df['status']=df['status'].astype("float")
df['status']=df['status'].astype("float")
# df['statusfinal'] = df['status1'] + df['status']
# df = df.drop(columns=['status','status1'])
# df = df.rename(columns={"statusfinal": "status"})
df


Unnamed: 0,date,price,wallets,address,mined,Metcafe,value,networkvalue,meanavge,meanvalue,status
299,2011-07-21,13.6,2,25137.0,6863850.00,6.318688e+08,92.06,-78.46,8.704611,4.895389,1.0
300,2011-07-22,13.7,2,24163.0,6883350.00,5.838506e+08,84.82,-71.12,8.762722,4.937278,1.0
301,2011-07-23,13.7,2,21647.0,6892150.00,4.685926e+08,67.99,-54.29,8.796167,4.903833,1.0
302,2011-07-24,14.0,2,22920.0,6885850.00,5.253264e+08,76.29,-62.29,8.801778,5.198222,1.0
303,2011-07-25,14.1,2,26711.0,6908450.00,7.134775e+08,103.28,-89.18,8.823000,5.277000,1.0
...,...,...,...,...,...,...,...,...,...,...,...
4381,2022-10-21,19162.6,84879615,924681.0,19185681.25,8.550350e+11,44566.31,-25403.71,25451.803333,-6289.203333,0.0
4382,2022-10-22,19204.8,84888147,830404.0,19186618.75,6.895708e+11,35940.19,-16735.39,25367.898556,-6163.098556,0.0
4383,2022-10-23,19571.2,84893809,804140.0,19187600.00,6.466411e+11,33700.99,-14129.79,25292.264222,-5721.064222,0.0
4384,2022-10-24,19331.5,84902680,919344.0,19188556.25,8.451934e+11,44046.74,-24715.24,25216.265500,-5884.765500,0.0


In [90]:
df['status'].value_counts()

1.0    2483
0.0    1604
Name: status, dtype: int64

In [91]:

# Create our features
X = df.drop(columns="status")
X = pd.get_dummies(X)

# Create our target
X = df.drop(columns="date")
y = df['status']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=20, train_size=0.70)


In [92]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score

In [93]:
## Logistic Regression
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})
y_pred = classifier.predict(X_test)
print(f'Training Score: {classifier.score(X_train, y_train)}')
print(f'Testing Score: {classifier.score(X_test, y_test)}')
print(classification_report(y_test, y_pred))
print(balanced_accuracy_score(y_test, y_pred))



Training Score: 0.6139860139860139
Testing Score: 0.5925020374898126
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       500
         1.0       0.59      1.00      0.74       727

    accuracy                           0.59      1227
   macro avg       0.30      0.50      0.37      1227
weighted avg       0.35      0.59      0.44      1227

0.5



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [94]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)
# Splitting into Train and Test sets into an 80/20 split.
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, random_state=78, train_size=0.80)
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Creating the decision tree classifier instance.
model = tree.DecisionTreeClassifier()
# Fitting the model.
model = model.fit(X_train_scaled, y_train)

# Making predictions using the testing data.
predictions = model.predict(X_test_scaled)


In [95]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,416,0
Actual 1,0,606


In [96]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)

In [97]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,416,0
Actual 1,0,606


Accuracy Score : 1.0
Classification Report
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       416
         1.0       1.00      1.00      1.00       606

    accuracy                           1.00      1022
   macro avg       1.00      1.00      1.00      1022
weighted avg       1.00      1.00      1.00      1022



In [98]:
from sklearn.ensemble import RandomForestClassifier
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=3000, random_state=78) 

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Making predictions using the testing data.
predictions = rf_model.predict(X_test_scaled)

In [99]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,416,0
Actual 1,0,606


In [100]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)

In [101]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,416,0
Actual 1,0,606


Accuracy Score : 1.0
Classification Report
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       416
         1.0       1.00      1.00      1.00       606

    accuracy                           1.00      1022
   macro avg       1.00      1.00      1.00      1022
weighted avg       1.00      1.00      1.00      1022



In [124]:
import plotly.express as px
fig = px.scatter(df, x="date", y="price", color="status" ,color_discrete_sequence=["orange", "green"],
                 title="price")
fig.add_trace(go.Scatter(name="value", x=df['date'], y=df['value'], marker = {'color' : 'orange'}, legendrank=4))
fig.add_trace(go.Scatter(name="meanavge", x=df['date'], y=df['meanavge'], marker = {'color' : 'blue'}, legendrank=4))

fig.update_yaxes(fixedrange=False)
fig.update_layout(title_text='Bitcoin Value Status')
# fig.update_yaxes(ticklabelposition="inside top", title="Price")
fig.update_yaxes(type="log")
fig.update_xaxes(ticklabelposition="inside top", title="Date")
fig.update_yaxes(nticks=15)
fig.update_xaxes(nticks=50)
fig.update_layout(
    margin=dict(l=20, r=100, t=70, b=20),
)

fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="right",
    x=0.5
))

fig.update_layout(
	updatemenus=[
		dict(
			active=0,
			buttons=list([
				dict(label="All",
					method="update",
					args=[{"visible": [True, True, True]},
						{"title": "Both"}]),
				dict(label="price",
					method="update",
					args=[{"visible": [True, False, False]},
						{"title": "Price",
							}]),
				dict(label="meanavge",
					method="update",
					args=[{"visible": [False, False, True]},
						{"title": "meanavge",
							}]),
								dict(label="value",
					method="update",
					args=[{"visible": [False, True, False]},
						{"title": "value",
							}]),
			]),
		)
	])

fig.show()
fig.write_html('../static/Bitcoinvaluestatus.html')

In [104]:
## Oversampling

# Resample the training data with the RandomOversampler
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
ros = RandomOverSampler(random_state=1)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

Counter(y_resampled)

Counter({1.0: 1877, 0.0: 1877})

In [105]:
# Train the Logistic Regression model using the resampled data
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver='lbfgs', random_state=1)
model.fit(X_resampled, y_resampled)

In [106]:
y_pred = model.predict(X_test)
confusion_matrix(y_test, y_pred)
balanced_accuracy_score(y_test, y_pred)

0.5

In [107]:
y_pred = model.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[  0, 416],
       [  0, 606]], dtype=int64)

In [108]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.00      0.00      1.00      0.00      0.00      0.00       416
        1.0       0.59      1.00      0.00      0.74      0.00      0.00       606

avg / total       0.35      0.59      0.41      0.44      0.00      0.00      1022




Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [109]:
## Undersampling

# Resample the data using the ClusterCentroids resampler
from imblearn.under_sampling import ClusterCentroids
cc = ClusterCentroids(random_state=1)
X_resampled, y_resampled = cc.fit_resample(X_train, y_train)

In [110]:

# Train the Logistic Regression model using the resampled data
model = LogisticRegression(solver='lbfgs', random_state=1)
model.fit(X_resampled, y_resampled)

In [111]:
# Calculated the balanced accuracy score
y_pred = model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.5

In [112]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[  0, 416],
       [  0, 606]], dtype=int64)

In [113]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.00      0.00      1.00      0.00      0.00      0.00       416
        1.0       0.59      1.00      0.00      0.74      0.00      0.00       606

avg / total       0.35      0.59      0.41      0.44      0.00      0.00      1022




Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [114]:
## Over and Under sampling
# Resample the training data with SMOTEENN
from imblearn.combine import SMOTEENN
smote_enn = SMOTEENN(random_state=1)
X_resampled, y_resampled = smote_enn.fit_resample(X, y)

In [115]:
# Train the Logistic Regression model using the resampled data
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='lbfgs', random_state=1)
model.fit(X_resampled, y_resampled)


In [116]:
# Display the confusion matrix
from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[  0, 416],
       [  0, 606]], dtype=int64)

In [117]:
# Calculated the balanced accuracy score
from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

0.5

In [118]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        0.0       0.00      0.00      1.00      0.00      0.00      0.00       416
        1.0       0.59      1.00      0.00      0.74      0.00      0.00       606

avg / total       0.35      0.59      0.41      0.44      0.00      0.00      1022




Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

