In [1]:
import pandas as pd
import numpy as np

import matplotlib as plt

%matplotlib inline

In [2]:
# Load the dataframe - it will be called df

df = pd.read_csv("merged_data_new.csv")

In [3]:
df.head(2)
df.tail(2)

Unnamed: 0,date_new,rating,Price,Change %,Volume
372,14-11-2022,0.077,16613.7,0.02,442460
373,15-11-2022,0.2,16895.1,0.02,337150


In [4]:
df.tail(5)

Unnamed: 0,date_new,rating,Price,Change %,Volume
369,11-11-2022,0.094,17049.9,-0.03,466350
370,12-11-2022,-0.118,16795.2,-0.01,192910
371,13-11-2022,-0.143,16324.5,-0.03,210800
372,14-11-2022,0.077,16613.7,0.02,442460
373,15-11-2022,0.2,16895.1,0.02,337150


In [5]:
df.columns

Index(['date_new', 'rating', 'Price', 'Change %', 'Volume'], dtype='object')

In [6]:
print(df.dtypes)

date_new     object
rating      float64
Price        object
Change %    float64
Volume        int64
dtype: object


In [7]:
df['Volume2'] = df['Volume'].astype(float)

In [8]:
print(df.dtypes)

date_new     object
rating      float64
Price        object
Change %    float64
Volume        int64
Volume2     float64
dtype: object


In [9]:
df.drop('Volume', axis=1, inplace=True)

In [10]:
print(df.dtypes)

date_new     object
rating      float64
Price        object
Change %    float64
Volume2     float64
dtype: object


In [11]:
def convert_to_float(value):
    try:
        return float(value.replace(',', ''))
    except ValueError:
        return value

df['Price'] = df['Price'].apply(convert_to_float)

In [12]:
print(df.dtypes)

date_new     object
rating      float64
Price       float64
Change %    float64
Volume2     float64
dtype: object


In [13]:
df.corr()

Unnamed: 0,rating,Price,Change %,Volume2
rating,1.0,0.043736,0.075463,0.002618
Price,0.043736,1.0,0.041296,-0.344527
Change %,0.075463,0.041296,1.0,-0.050543
Volume2,0.002618,-0.344527,-0.050543,1.0


In [14]:
bitcoin_train = df[df['date_new'] > '06-15-2022']
bitcoin_test = df[df['date_new'] <= '06-15-2022']


len(bitcoin_train), len(bitcoin_test)

(300, 74)

In [15]:
import statsmodels.formula.api as smf

ols = smf.ols(formula='Price ~ rating', 
                 data=bitcoin_train)
model1 =ols.fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                  Price   R-squared:                       0.030
Model:                            OLS   Adj. R-squared:                  0.027
Method:                 Least Squares   F-statistic:                     9.173
Date:                Thu, 23 Feb 2023   Prob (F-statistic):            0.00267
Time:                        23:19:23   Log-Likelihood:                -3267.1
No. Observations:                 300   AIC:                             6538.
Df Residuals:                     298   BIC:                             6546.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   3.192e+04    921.457     34.635      0.0

In [16]:
def OSR2(model, df_train, df_test, dependent_var):   
    y_test = df_test[dependent_var]
    y_pred = model.predict(df_test)
    SSE = np.sum((y_test - y_pred)**2)
    SST = np.sum((y_test - np.mean(df_train[dependent_var]))**2)    
    return 1 - SSE/SST

OSR2(model1, bitcoin_train, bitcoin_test, 'Price')

-0.19570156110365566