In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score



# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/directional-forecasting-in-cryptocurrencies/sample_submission.csv
/kaggle/input/directional-forecasting-in-cryptocurrencies/train.csv
/kaggle/input/directional-forecasting-in-cryptocurrencies/test.csv


In [5]:
# Load the CSV file into a DataFrame

data = pd.read_csv('/kaggle/input/directional-forecasting-in-cryptocurrencies/train.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
data.set_index('timestamp', inplace=True)
data['number_of_trades'] = data['number_of_trades'].astype(int)
data['target'] = data['target'].astype(int)
data.head()


Unnamed: 0_level_0,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_volume,taker_buy_quote_volume,target
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-05-04 22:01:00,0.9012,0.9013,0.9012,0.9013,134.98,121.646459,4,125.08,112.723589,1
2018-05-04 22:02:00,0.90185,0.90195,0.90185,0.90195,1070.54,965.505313,12,879.94,793.612703,0
2018-05-04 22:03:00,0.9014,0.9014,0.90139,0.90139,2293.06,2066.963991,5,0.0,0.0,0
2018-05-04 22:04:00,0.90139,0.9014,0.90138,0.90139,6850.59,6175.000909,19,1786.3,1610.149485,0
2018-05-04 22:05:00,0.90139,0.90139,0.9013,0.9013,832.3,750.222624,3,784.82,707.4289,0


In [6]:
# Initialize the Random Forest model
model = RandomForestClassifier(n_estimators=50, min_samples_split=100, random_state=1)

# Split the data into training and test sets
split_index = int(len(data) * 0.8)
 
train = data.iloc[:split_index]
test = data.iloc[split_index:]

# Train the model
predictors = ['open', 'high', 'low', 'close', 'volume', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_volume', 'taker_buy_quote_volume']
model.fit(train[predictors], train['target'])

In [7]:
# Evaluate the model
predictions = model.predict(test[predictors])

accuracy = accuracy_score(test['target'], predictions)
precision = precision_score(test['target'], predictions, average='weighted')
recall = recall_score(test['target'], predictions, average='weighted')
f1 = f1_score(test['target'], predictions, average='weighted')

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

Accuracy: 0.5063723827293115
Precision: 0.5025769825846536
Recall: 0.5063723827293115
F1 Score: 0.5025882540514813


In [9]:
import pandas as pd
import joblib

# Save the model
joblib_file = "random_forest_model.pkl"
joblib.dump(model, joblib_file)

print(f"Model saved as {joblib_file}")

Model saved as random_forest_model.pkl


In [10]:
# test the saved model

import pandas as pd
import joblib

# Load the model
joblib_file = "random_forest_model.pkl"
model = joblib.load(joblib_file)

# Load the test data from CSV
test_data = pd.read_csv('/kaggle/input/directional-forecasting-in-cryptocurrencies/test.csv')

In [11]:
# Prepare the predictors
predictors = ['open', 'high', 'low', 'close', 'volume', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_volume', 'taker_buy_quote_volume']
X_test = test_data[predictors]

# Make predictions
predictions = model.predict(X_test)

In [12]:
# Create a DataFrame with the predictions
output = pd.DataFrame({'row_id': test_data.index, 'target': predictions})

# Save the predictions to a CSV file
output.to_csv('sample_submission.csv', index=False)

print("Predictions saved to sample_submission.csv")

Predictions saved to sample_submission.csv
