In [5]:
import pandas as pd

# Creating the URL to fetch data from Yahoo Finance
symbol = "BTC-USD"
interval = "1h"
period = "3mo"
url = f"https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?interval={interval}&range={period}"

# Fetching the data
response = pd.read_json(url)

# Processing the data
df = pd.DataFrame(response["chart"]["result"][0]["indicators"]["quote"][0])
df["timestamp"] = response["chart"]["result"][0]["timestamp"]

# Converting Unix timestamps to datetime
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")

# Saving the data as CSV
df.to_csv("btc_usdt_data.csv", index=False)


In [7]:
import pandas as pd

# Calculate percentage change as candlestick change
df["PctChange"] = df["close"].pct_change() * 100

# Set the first row's percentage change as NaN
df.loc[0, "PctChange"] = 0

# Save the data with percentage change as CSV
df.to_csv("btc_usdt_data_with_pct_change.csv", index=False)

    
# Display the last 10 rows
print(df.tail(10))

               low          high       volume         close          open  \
2200  30533.310547  30796.250000  273352704.0  30533.310547  30622.925781   
2201  30419.296875  30644.109375   11874304.0  30459.607422  30601.869141   
2202  30437.730469  30531.214844          0.0  30477.902344  30453.050781   
2203  30474.136719  30556.808594          0.0  30500.802734  30474.136719   
2204  30446.363281  30639.820312          0.0  30535.990234  30496.746094   
2205  30515.318359  30610.484375   24939520.0  30572.707031  30539.128906   
2206  30490.560547  30596.470703  112037888.0  30596.470703  30563.767578   
2207  30273.216797  30632.925781  588941312.0  30404.148438  30595.830078   
2208  30369.322266  30439.736328  549699584.0  30439.736328  30406.349609   
2209  30436.298828  30436.298828          0.0  30436.298828  30436.298828   

               timestamp      change  PctChange  
2200 2023-06-29 13:00:00  -89.615234  -0.274679  
2201 2023-06-29 14:00:00 -142.261719  -0.241386  
22

In [8]:
# Shift the "PctChange" feature by 1 to 5 steps
for i in range(1, 6):
    df[f"PctChange_{i}"] = df["PctChange"].shift(i)

# Save the data with shifted percentage changes as CSV
df.to_csv("btc_usdt_data_with_shifted_pct_change.csv", index=False)

print(df.tail(10))

               low          high       volume         close          open  \
2200  30533.310547  30796.250000  273352704.0  30533.310547  30622.925781   
2201  30419.296875  30644.109375   11874304.0  30459.607422  30601.869141   
2202  30437.730469  30531.214844          0.0  30477.902344  30453.050781   
2203  30474.136719  30556.808594          0.0  30500.802734  30474.136719   
2204  30446.363281  30639.820312          0.0  30535.990234  30496.746094   
2205  30515.318359  30610.484375   24939520.0  30572.707031  30539.128906   
2206  30490.560547  30596.470703  112037888.0  30596.470703  30563.767578   
2207  30273.216797  30632.925781  588941312.0  30404.148438  30595.830078   
2208  30369.322266  30439.736328  549699584.0  30439.736328  30406.349609   
2209  30436.298828  30436.298828          0.0  30436.298828  30436.298828   

               timestamp      change  PctChange  PctChange_1  PctChange_2  \
2200 2023-06-29 13:00:00  -89.615234  -0.274679    -0.162321    -0.109067  

In [10]:
# Create a new column with binary values based on the percentage change
df["TargetChange"] = df["PctChange"].apply(lambda x: 1 if x >= 0 else 0)

print(df.tail(10))

               low          high       volume         close          open  \
2200  30533.310547  30796.250000  273352704.0  30533.310547  30622.925781   
2201  30419.296875  30644.109375   11874304.0  30459.607422  30601.869141   
2202  30437.730469  30531.214844          0.0  30477.902344  30453.050781   
2203  30474.136719  30556.808594          0.0  30500.802734  30474.136719   
2204  30446.363281  30639.820312          0.0  30535.990234  30496.746094   
2205  30515.318359  30610.484375   24939520.0  30572.707031  30539.128906   
2206  30490.560547  30596.470703  112037888.0  30596.470703  30563.767578   
2207  30273.216797  30632.925781  588941312.0  30404.148438  30595.830078   
2208  30369.322266  30439.736328  549699584.0  30439.736328  30406.349609   
2209  30436.298828  30436.298828          0.0  30436.298828  30436.298828   

               timestamp      change  PctChange  PctChange_1  PctChange_2  \
2200 2023-06-29 13:00:00  -89.615234  -0.274679    -0.162321    -0.109067  

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Remove rows with missing values
df = df.dropna()

# Define the features and target variables
features = df[["PctChange", "PctChange_1", "PctChange_2", "PctChange_3", "PctChange_4", "PctChange_5"]]
target = df["TargetChange"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [15]:
# Create an SVM classifier
svm = SVC()

# Train the model
svm.fit(X_train, y_train)


In [16]:
# Make predictions on the test set
y_pred = svm.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.971764705882353
