In [1]:
import pandas as pd

# Creating the URL to fetch data from Yahoo Finance
symbol = "BTC-USD"
interval = "1h"
period = "3mo"
url = f"https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?interval={interval}&range={period}"

# Fetching the data
response = pd.read_json(url)

# Processing the data
df = pd.DataFrame(response["chart"]["result"][0]["indicators"]["quote"][0])
df["timestamp"] = response["chart"]["result"][0]["timestamp"]

# Converting Unix timestamps to datetime
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")

# Saving the data as CSV
df.to_csv("btc_usdt_data.csv", index=False)


In [2]:
import pandas as pd

# Calculate percentage change as candlestick change
df["PctChange"] = df["close"].pct_change() * 100

# Set the first row's percentage change as NaN
df.loc[0, "PctChange"] = 0

# Save the data with percentage change as CSV
df.to_csv("btc_usdt_data_with_pct_change.csv", index=False)

    
# Display the last 10 rows
print(df.tail(10))

               low       volume         close          high          open  \
2200  30419.296875   11874304.0  30459.607422  30644.109375  30601.869141   
2201  30437.730469          0.0  30477.902344  30531.214844  30453.050781   
2202  30474.136719          0.0  30500.802734  30556.808594  30474.136719   
2203  30446.363281          0.0  30535.990234  30639.820312  30496.746094   
2204  30515.318359   24939520.0  30572.707031  30610.484375  30539.128906   
2205  30490.560547  112037888.0  30596.470703  30596.470703  30563.767578   
2206  30273.216797  588941312.0  30404.148438  30632.925781  30595.830078   
2207  30369.322266  553745408.0  30416.703125  30442.902344  30406.349609   
2208  30411.988281  514413568.0  30443.035156  30445.513672  30412.480469   
2209  30439.652344          0.0  30439.652344  30439.652344  30439.652344   

               timestamp  PctChange  
2200 2023-06-29 14:00:00  -0.241386  
2201 2023-06-29 15:00:00   0.060063  
2202 2023-06-29 16:00:00   0.075138  


In [3]:
# Shift the "PctChange" feature by 1 to 5 steps
for i in range(1, 6):
    df[f"PctChange_{i}"] = df["PctChange"].shift(i)

# Save the data with shifted percentage changes as CSV
df.to_csv("btc_usdt_data_with_shifted_pct_change.csv", index=False)

print(df.tail(10))

               low       volume         close          high          open  \
2200  30419.296875   11874304.0  30459.607422  30644.109375  30601.869141   
2201  30437.730469          0.0  30477.902344  30531.214844  30453.050781   
2202  30474.136719          0.0  30500.802734  30556.808594  30474.136719   
2203  30446.363281          0.0  30535.990234  30639.820312  30496.746094   
2204  30515.318359   24939520.0  30572.707031  30610.484375  30539.128906   
2205  30490.560547  112037888.0  30596.470703  30596.470703  30563.767578   
2206  30273.216797  588941312.0  30404.148438  30632.925781  30595.830078   
2207  30369.322266  553745408.0  30416.703125  30442.902344  30406.349609   
2208  30411.988281  514413568.0  30443.035156  30445.513672  30412.480469   
2209  30439.652344          0.0  30439.652344  30439.652344  30439.652344   

               timestamp  PctChange  PctChange_1  PctChange_2  PctChange_3  \
2200 2023-06-29 14:00:00  -0.241386    -0.274679    -0.162321    -0.109067

In [4]:
# Create a new column with binary values based on the percentage change
df["TargetChange"] = df["PctChange"].apply(lambda x: 1 if x >= 0 else 0)

print(df.tail(10))

               low       volume         close          high          open  \
2200  30419.296875   11874304.0  30459.607422  30644.109375  30601.869141   
2201  30437.730469          0.0  30477.902344  30531.214844  30453.050781   
2202  30474.136719          0.0  30500.802734  30556.808594  30474.136719   
2203  30446.363281          0.0  30535.990234  30639.820312  30496.746094   
2204  30515.318359   24939520.0  30572.707031  30610.484375  30539.128906   
2205  30490.560547  112037888.0  30596.470703  30596.470703  30563.767578   
2206  30273.216797  588941312.0  30404.148438  30632.925781  30595.830078   
2207  30369.322266  553745408.0  30416.703125  30442.902344  30406.349609   
2208  30411.988281  514413568.0  30443.035156  30445.513672  30412.480469   
2209  30439.652344          0.0  30439.652344  30439.652344  30439.652344   

               timestamp  PctChange  PctChange_1  PctChange_2  PctChange_3  \
2200 2023-06-29 14:00:00  -0.241386    -0.274679    -0.162321    -0.109067

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

# Remove rows with missing values
df = df.dropna()

# Define the features and target variables
features = df[["PctChange_1", "PctChange_2", "PctChange_3", "PctChange_4", "PctChange_5"]]
target = df["TargetChange"]

scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_features, target, test_size=0.2, random_state=42)

In [6]:
# Create an SVM classifier
svm = SVC()

# Train the model
svm.fit(X_train, y_train)


In [7]:
# Make predictions on the test set
y_pred = svm.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.5105882352941177


In [10]:
print(df.tail(10))

               low       volume         close          high          open  \
2200  30419.296875   11874304.0  30459.607422  30644.109375  30601.869141   
2201  30437.730469          0.0  30477.902344  30531.214844  30453.050781   
2202  30474.136719          0.0  30500.802734  30556.808594  30474.136719   
2203  30446.363281          0.0  30535.990234  30639.820312  30496.746094   
2204  30515.318359   24939520.0  30572.707031  30610.484375  30539.128906   
2205  30490.560547  112037888.0  30596.470703  30596.470703  30563.767578   
2206  30273.216797  588941312.0  30404.148438  30632.925781  30595.830078   
2207  30369.322266  553745408.0  30416.703125  30442.902344  30406.349609   
2208  30411.988281  514413568.0  30443.035156  30445.513672  30412.480469   
2209  30439.652344          0.0  30439.652344  30439.652344  30439.652344   

               timestamp  PctChange  PctChange_1  PctChange_2  PctChange_3  \
2200 2023-06-29 14:00:00  -0.241386    -0.274679    -0.162321    -0.109067

In [9]:
# Örnek bir tahmin yapma
sample = [[0.05, 0.40, -0.06, -0.12, -0.04]]
sample2 = [[0.40, -0.06, -0.12, 0.04, 0.17]]
sample3 = [[-0.06, -0.12, 0.04, 0.17, 0.02]]
sample4 = [[-0.4, -0.23, 0.39, 1.67, 2.30]]
sample5 = [[-0.23, 0.39, 1.67, 2.30, 7.11]]
sample5 = [[-0.23, 0.39, 1.67, 2.30, 7.11]]


prediction = svm.predict(sample)
prediction2 = svm.predict(sample2)
prediction3 = svm.predict(sample3)
prediction4 = svm.predict(sample4)
prediction5 = svm.predict(sample5)

print("Prediction:", prediction)
print("Prediction2:", prediction2)
print("Prediction3:", prediction3)
print("Prediction4:", prediction4)
print("Prediction5:", prediction5)

Prediction: [1]
Prediction2: [1]
Prediction3: [1]
Prediction4: [1]
Prediction5: [1]
