In [78]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [79]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [80]:
# Replace these file paths with the paths to your CSV files
file_paths = {
    "code_commits": "token_terminal_data/daily_code_commits.csv",
    "core_developers": "token_terminal_data/daily_core_developers.csv",
    "crv_price": "token_terminal_data/daily_crv_price.csv",
    "crv_token_holders": "token_terminal_data/daily_crv_token_holders.csv",
    "crv_trading_volume": "token_terminal_data/daily_crv_trading_volume.csv",
    "earnings": "token_terminal_data/daily_earnings.csv",
    "expenses": "token_terminal_data/daily_expenses.csv",
    "fees": "token_terminal_data/daily_fees.csv",
    "market_cap_circulating": "token_terminal_data/daily_market_cap_circulating.csv",
    "market_cap_fully_diluted": "token_terminal_data/daily_market_cap_fully_diluted.csv",
    "pf_ratio_circulating": "token_terminal_data/daily_pf_ratio_circulating.csv",
    "pf_ratio_fully_diluted": "token_terminal_data/daily_pf_ratio_fully_diluted.csv",
    "ps_ratio_circulating": "token_terminal_data/daily_ps_ratio_circulating.csv",
    "ps_ratio_fully_diluted": "token_terminal_data/daily_ps_ratio_fully_diluted.csv",
    "revenue": "token_terminal_data/daily_revenue.csv",
    "supplyside_fees": "token_terminal_data/daily_supplyside_fees.csv",
    "token_incentives": "token_terminal_data/daily_token_incentives.csv",
    "trading_volume_on_curve": "token_terminal_data/daily_trading_volume_on_curve.csv",
    "treasury": "token_terminal_data/daily_treasury.csv",
    "tvl": "token_terminal_data/daily_tvl.csv",
    "pool": "token_terminal_data/wbtc_weth_usdt_pool_data.csv"
}


# Initialize an empty DataFrame to merge all datasets
combined_df = None

# Read each file and merge it into the combined DataFrame
for file_label, file_path in file_paths.items():
    # Read the CSV file
    df = pd.read_csv(file_path)

    # Assuming 'date' is the common column across all files
    if combined_df is None:
        combined_df = df
    else:
        combined_df = pd.merge(combined_df, df, on="date", how="outer")

In [81]:
list(combined_df.columns)

['date',
 'code_commits',
 'core_developers',
 'price',
 'crv_token_holders',
 'crv_trading_volume',
 'earnings',
 'expenses',
 'fees',
 'market_cap_circulating',
 'market_cap_fully_diluted',
 'p_f_ratio_circulating',
 'p_f_ratio_fully_diluted',
 'p_s_ratio_circulating',
 'p_s_ratio_fully_diluted',
 'revenue',
 'supply_side_fees',
 'token_incentives',
 'trading_volume',
 'treasury',
 'tvl',
 'pool_transaction_volume']

In [95]:
X = combined_df[['code_commits',
 'price',
 'crv_token_holders',
 'crv_trading_volume',
 'fees',
 'market_cap_circulating',
 'p_f_ratio_circulating',
 'token_incentives',
 'trading_volume',
 'tvl']]
y = combined_df['pool_transaction_volume']

X = X.fillna(X.mean())

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Implement the Linear Regression Model
model = LinearRegression()

# Train the Model
model.fit(X_train, y_train)

# Predict using the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Squared Error: 1858944914036821.0
R^2 Score: 0.4622714932061103
