In [93]:
import pandas as pd
import numpy as np
import os
import requests
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score
from joblib import dump
import warnings


In [94]:
warnings.filterwarnings('ignore')

In [95]:
data = pd.read_csv("Data_Sets\eth-usd-max.csv")
data = data.dropna()

In [96]:
display(data)

Unnamed: 0,snapped_at,price,market_cap,total_volume
0,2015-08-07 00:00:00 UTC,2.831620,0.000000e+00,9.062200e+04
1,2015-08-08 00:00:00 UTC,1.330750,8.033948e+07,3.680700e+05
2,2015-08-10 00:00:00 UTC,0.687586,4.155631e+07,4.004641e+05
3,2015-08-11 00:00:00 UTC,1.067379,6.453901e+07,1.518998e+06
4,2015-08-12 00:00:00 UTC,1.256613,7.601326e+07,2.073893e+06
...,...,...,...,...
3432,2024-12-30 00:00:00 UTC,3357.330743,4.044600e+11,1.131543e+10
3433,2024-12-31 00:00:00 UTC,3359.513942,4.046310e+11,2.766414e+10
3434,2025-01-01 00:00:00 UTC,3336.617514,4.019730e+11,2.146639e+10
3435,2025-01-02 00:00:00 UTC,3348.967247,4.033740e+11,1.344060e+10


In [97]:
data.columns

Index(['snapped_at', 'price', 'market_cap', 'total_volume'], dtype='object')

In [98]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3436 entries, 0 to 3436
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   snapped_at    3436 non-null   object 
 1   price         3436 non-null   float64
 2   market_cap    3436 non-null   float64
 3   total_volume  3436 non-null   float64
dtypes: float64(3), object(1)
memory usage: 134.2+ KB


In [99]:
data['snapped_at'] = pd.to_datetime(data['snapped_at'])

In [100]:
data['year'] = pd.DatetimeIndex(data['snapped_at']).year
data['month'] = pd.DatetimeIndex(data['snapped_at']).month
data['day'] = pd.DatetimeIndex(data['snapped_at']).day

In [101]:
display(data)

Unnamed: 0,snapped_at,price,market_cap,total_volume,year,month,day
0,2015-08-07 00:00:00+00:00,2.831620,0.000000e+00,9.062200e+04,2015,8,7
1,2015-08-08 00:00:00+00:00,1.330750,8.033948e+07,3.680700e+05,2015,8,8
2,2015-08-10 00:00:00+00:00,0.687586,4.155631e+07,4.004641e+05,2015,8,10
3,2015-08-11 00:00:00+00:00,1.067379,6.453901e+07,1.518998e+06,2015,8,11
4,2015-08-12 00:00:00+00:00,1.256613,7.601326e+07,2.073893e+06,2015,8,12
...,...,...,...,...,...,...,...
3432,2024-12-30 00:00:00+00:00,3357.330743,4.044600e+11,1.131543e+10,2024,12,30
3433,2024-12-31 00:00:00+00:00,3359.513942,4.046310e+11,2.766414e+10,2024,12,31
3434,2025-01-01 00:00:00+00:00,3336.617514,4.019730e+11,2.146639e+10,2025,1,1
3435,2025-01-02 00:00:00+00:00,3348.967247,4.033740e+11,1.344060e+10,2025,1,2


In [102]:
data.columns

Index(['snapped_at', 'price', 'market_cap', 'total_volume', 'year', 'month',
       'day'],
      dtype='object')

In [103]:
X = data[['day','month','year','market_cap', 'total_volume']]
y = data['price']

preprocessing the data

In [104]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [105]:


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# List of regression models supporting partial_fit
models = {
    "SGDRegressor": SGDRegressor(max_iter=1000, tol=1e-3),
   
}

# Variables to track the best model
best_model = None
best_model_name = None
best_r2 = -float("inf")  # Start with the lowest possible R²

# Train and evaluate each model
for model_name, model in models.items():
    print(f"Training {model_name}...")
    # Initialize the model with partial_fit
    model.partial_fit(X_train[:100], y_train[:100])
    batch_size = 100
    for i in range(batch_size, len(X_train), batch_size):
        X_batch = X_train[i:i + batch_size]
        y_batch = y_train[i:i + batch_size]
        model.partial_fit(X_batch, y_batch)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"{model_name} Mean Squared Error: {mse:.4f}")
    print(f"{model_name} R² Score: {r2:.4f}")

    # Update the best model if current model is better
    if r2 > best_r2:
        best_r2 = r2
        best_model = model
        best_model_name = model_name

# Save the best model
model_folder = "models"
os.makedirs(model_folder, exist_ok=True)

if best_model is not None:
    model_path = os.path.join(model_folder, f"{model_name}.joblib")
    dump(model, model_path)
    print(f"{model_name} saved to {model_path}")


Training SGDRegressor...
SGDRegressor Mean Squared Error: 18059.6423
SGDRegressor R² Score: 0.9871
SGDRegressor saved to models\SGDRegressor.joblib


In [106]:
# Save the model


In [107]:
# Step 1: Fetch data from CoinGecko API
API_URL = "https://api.coingecko.com/api/v3/coins/{id}/market_chart"

def fetch_last_30_days_data(coin_id, vs_currency="usd"):
    try:
        # Parameters for the API request
        params = {
            "vs_currency": vs_currency,
            "days": 30,  # Fetch data for the last 30 days
            "interval": "daily"  # Get daily data
        }

        # Format the URL with the coin ID
        url = API_URL.format(id=coin_id)

        # Send the GET request
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Parse the JSON response
        data = response.json()

        # Extract required fields
        prices = data.get("prices", [])
        market_caps = data.get("market_caps", [])
        total_volumes = data.get("total_volumes", [])

        # Combine data into a list of dictionaries
        result = []
        for i in range(len(prices)):
            result.append({
                "snapped_at": datetime.utcfromtimestamp(prices[i][0] / 1000).strftime('%Y-%m-%d %H:%M:%S'),
                "price": prices[i][1],
                "market_cap": market_caps[i][1] if i < len(market_caps) else None,
                "total_volume": total_volumes[i][1] if i < len(total_volumes) else None,
            })

        # Convert the result to a Pandas DataFrame
        df = pd.DataFrame(result)
        return df

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of an error


In [108]:
#preprocess the 30days data 
fetch_data = fetch_last_30_days_data("ethereum")

fetch_data.dropna(inplace=True)

display(fetch_data)

Unnamed: 0,snapped_at,price,market_cap,total_volume
0,2024-12-07 00:00:00,4013.726145,483422500000.0,55954470000.0
1,2024-12-08 00:00:00,4000.992077,481889200000.0,21959790000.0
2,2024-12-09 00:00:00,4015.782021,483642900000.0,20728150000.0
3,2024-12-10 00:00:00,3713.313898,446571300000.0,59141400000.0
4,2024-12-11 00:00:00,3626.588642,436749600000.0,63643510000.0
5,2024-12-12 00:00:00,3828.107588,461075900000.0,38471620000.0
6,2024-12-13 00:00:00,3878.850012,466883100000.0,47474610000.0
7,2024-12-14 00:00:00,3907.42966,470469900000.0,36218320000.0
8,2024-12-15 00:00:00,3866.995802,465713300000.0,28647720000.0
9,2024-12-16 00:00:00,3961.315466,477323200000.0,24816850000.0


In [109]:
#split and standardization (split by date)
fetch_data['snapped_at'] = pd.to_datetime(fetch_data['snapped_at'])
fetch_data['year'] = pd.DatetimeIndex(fetch_data['snapped_at']).year
fetch_data['month'] = pd.DatetimeIndex(fetch_data['snapped_at']).month
fetch_data['day'] = pd.DatetimeIndex(fetch_data['snapped_at']).day

In [110]:
display(fetch_data.head())

Unnamed: 0,snapped_at,price,market_cap,total_volume,year,month,day
0,2024-12-07,4013.726145,483422500000.0,55954470000.0,2024,12,7
1,2024-12-08,4000.992077,481889200000.0,21959790000.0,2024,12,8
2,2024-12-09,4015.782021,483642900000.0,20728150000.0,2024,12,9
3,2024-12-10,3713.313898,446571300000.0,59141400000.0,2024,12,10
4,2024-12-11,3626.588642,436749600000.0,63643510000.0,2024,12,11


In [111]:
fetch_data.columns

Index(['snapped_at', 'price', 'market_cap', 'total_volume', 'year', 'month',
       'day'],
      dtype='object')

In [112]:
X = fetch_data[['day','month','year','market_cap', 'total_volume']]
y = fetch_data['price']

In [113]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [114]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [115]:
import joblib
base_model_path = "models\SGDRegressor.joblib"  # Path to the pre-trained model
   

    # Load base model and scaler
model = joblib.load(base_model_path)

In [116]:
model.partial_fit(X_train,y_train)

In [117]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f" Mean Squared Error: {mse:.4f}")
print(f" R² Score: {r2:.4f}")

 Mean Squared Error: 5483972.1922
 R² Score: -118.9513
