In [45]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import yfinance as yf
import numpy as np
from sklearn.metrics import accuracy_score
import pandas as pd
import warnings
from sklearn.metrics import classification_report

In [46]:
warnings.filterwarnings('ignore')

In [47]:
xmr = yf.Ticker("XMR-USD")
df_xmr = yf.download(tickers = "XMR-USD",
                     period = "max",
                     interval = "1d")

[*********************100%%**********************]  1 of 1 completed


In [48]:
df_xmr['Close_diff'] = df_xmr['Close'].diff()

# Create a new column 'Target' with binary values
df_xmr['Target'] = df_xmr['Close_diff'].apply(lambda x: 1 if x > 0 else 0)

# Remove the first row since it doesn't have a previous day to compare with
df_xmr = df_xmr[1:]

In [49]:
# Select the features and target variable
X = df_xmr[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
y = df_xmr['Target']

In [50]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [51]:
# Create an instance of the classifier
clf = DecisionTreeClassifier()

# Train the model
clf.fit(X_train, y_train)

In [52]:
# Make predictions on the testing data
y_pred = clf.predict(X_test)

# Calculate the accuracy of the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(f1_score(y_test, y_pred))

Accuracy: 0.8202764976958525
              precision    recall  f1-score   support

           0       0.80      0.81      0.81       202
           1       0.83      0.83      0.83       232

    accuracy                           0.82       434
   macro avg       0.82      0.82      0.82       434
weighted avg       0.82      0.82      0.82       434

0.8311688311688311


In [None]:
# Prepare the new data (similar to step 1)
# Assuming you have a new DataFrame called 'new_data'
new_data = pd.DataFrame()
new_data['Close_diff'] = new_data['Close'].diff()
new_data['Target'] = new_data['Close_diff'].apply(lambda x: 1 if x > 0 else 0)
new_data = new_data[1:]

# Select the features for prediction
X_new = new_data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]

# Make predictions on the new data
predictions = clf.predict(X_new)

In [None]:
df_xmr["will_increase"] = np.where(df_xmr["Close"] - df_xmr["Open"] > 0, 1, 0)

In [38]:

df_xmr['Close_diff'] = df_xmr['Close'].diff()

# Create a new column 'Target' with binary values
df_xmr['Target'] = df_xmr['Close_diff'].apply(lambda x: 1 if x > 0 else 0)

# Remove the first row since it doesn't have a previous day to compare with
df_xmr = df_xmr[1:]

In [39]:
# Select the features and target variable
X = df_xmr[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
y = df_xmr['Target']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [40]:
cls = LogisticRegressionCV()
params = {
    'Cs': [1000, 100, 10, 1, 0.1, 0.01, 0.001, 0.0001],
    'max_iter': [100, 1000],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear', 'lbfgs']
}
grid_search = GridSearchCV(estimator=cls, param_grid=params,cv=5)
grid_search.fit(X_train, y_train)

In [53]:
print("best solver:", grid_search.best_params_['solver'])
print("best C:", grid_search.best_params_['Cs'])
print("best max_iter:", grid_search.best_params_['max_iter'])
print("best penalty:", grid_search.best_params_['penalty'])

best solver: liblinear
best C: 1000
best max_iter: 100
best penalty: l1


In [43]:
model = LogisticRegression(solver=grid_search.best_params_['solver'],
                         C=grid_search.best_params_['Cs'], 
                         max_iter=grid_search.best_params_['max_iter'], 
                         penalty=grid_search.best_params_['penalty'], 
                         random_state=42)
model.fit(X_train, y_train)

In [44]:
y_pred=model.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(f1_score(y_test, y_pred))

0.972027972027972
              precision    recall  f1-score   support

           0       0.99      0.96      0.97       350
           1       0.96      0.99      0.97       365

    accuracy                           0.97       715
   macro avg       0.97      0.97      0.97       715
weighted avg       0.97      0.97      0.97       715

0.9729729729729729
