In [1]:
# 1. Importing Libraries & Dataset

# __init__.py
"""
Crypto Analysis Package
"""
# data_loader.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def load_data(file_path_16, file_path_17):
    """
    Load datasets from the specified CSV files and combine them into one DataFrame.

    Parameters:
    - file_path_16: str, path to the CSV file for March 16th
    - file_path_17: str, path to the CSV file for March 17th

    Returns:
    - data: DataFrame, combined dataset
    """
    # Load datasets
    data_16 = pd.read_csv(file_path_16)
    data_17 = pd.read_csv(file_path_17)

    # Combine datasets
    data = pd.concat([data_16, data_17], ignore_index=True)
    return data

# 2 Data Preprocessing

# data_preprocessing.py

import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def preprocess_data(data):
    """
    Preprocess the cryptocurrency data.

    Parameters:
    - data: DataFrame, the combined dataset

    Returns:
    - data: DataFrame, preprocessed dataset
    """
    # Check for missing values
    print("Missing values in each column:")
    print(data.isnull().sum())

    # Handle missing values (if any)
    data.fillna(method='ffill', inplace=True)

    # Normalize/scale features
    scaler = MinMaxScaler()
    data[['price', '24h_volume', 'mkt_cap']] = scaler.fit_transform(data[['price', '24h_volume', 'mkt_cap']])

    # Parse dates and sort
    data['date'] = pd.to_datetime(data['date'])
    data.sort_values('date', inplace=True)

    return data

# 3  Feature Engineering

# feature_engineering.py

import pandas as pd

def engineer_features(data):
    """
    Perform feature engineering on the cryptocurrency data.

    Parameters:
    - data: DataFrame, the preprocessed dataset

    Returns:
    - data: DataFrame, dataset with engineered features
    """
    # Calculate Liquidity Ratio
    data['liquidity_ratio'] = data['24h_volume'] / data['mkt_cap']

    # Calculate Moving Averages
    data['7d_moving_avg'] = data['price'].rolling(window=7).mean()
    data['14d_moving_avg'] = data['price'].rolling(window=14).mean()

    # Calculate Price Change Percentages
    data['price_change_pct'] = data['price'].pct_change() * 100

    # Calculate Volatility Indicators
    data['volatility'] = data['price'].rolling(window=7).std()

    return data

# 4  Exploratory Data Analysis (EDA).

# exploratory_analysis.py

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def perform_eda(data):
    """
    Perform exploratory data analysis on the cryptocurrency data.

    Parameters:
    - data: DataFrame, the dataset with engineered features

    Returns:
    - None
    """
    # Summary statistics
    print("Summary Statistics:")
    print(data.describe())

    # Correlation matrix
    correlation_matrix = data.corr()
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm')
    plt.title('Correlation Matrix')
    plt.show()

    # Visualizations
    plt.figure(figsize=(12, 6))
    plt.scatter(data['24h_volume'], data['price'])
    plt.title('Price vs Volume')
    plt.xlabel('24h Volume')
    plt.ylabel('Price')
    plt.show()

    # Liquidity ratio trends
    plt.figure(figsize=(12, 6))
    plt.plot(data['date'], data['liquidity_ratio'])
    plt.title('Liquidity Ratio Trends')
    plt.xlabel('Date')
    plt.ylabel('Liquidity Ratio')
    plt.show()

# 5 Model Development

# model_development.py

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def develop_model(data):
    """
    Develop and evaluate a Random Forest model for predicting liquidity ratio.

    Parameters:
    - data: DataFrame, the dataset with engineered features

    Returns:
    - metrics: dict, evaluation metrics of the model
    """
    # Split data into features and target
    X = data[['price', '24h_volume', 'mkt_cap', '7d_moving_avg', '14d_moving_avg']]
    y = data['liquidity_ratio']

    # Split into train/test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Use Random Forest Regressor
    model = RandomForestRegressor()
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    metrics = {
        'MAE': mae,
        'RMSE': rmse,
        'R²': r2
    }

    print(f'MAE: {mae}, RMSE: {rmse}, R²: {r2}')
    return metrics


# 6  Hyperparameter Tuning

# hyperparameter_tuning.py

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

def tune_hyperparameters(X_train, y_train):
    """
    Perform hyperparameter tuning for the Random Forest model using GridSearchCV.

    Parameters:
    - X_train: DataFrame, training features
    - y_train: Series, training target

    Returns:
    - best_params: dict, best hyperparameters found
    """
    # Define the parameter grid
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    }

    # Initialize GridSearchCV
    grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=3)
    grid_search.fit(X_train, y_train)

    # Print best parameters
    print(f'Best parameters: {grid_search.best_params_}')
    return grid_search.best_params_

# 7 Model Evaluation

# model_evaluation.py

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def evaluate_model(y_test, y_pred):
    """
    Evaluate the model by plotting actual vs predicted values and printing evaluation scores.

    Parameters:
    - y_test: Series, actual target values
    - y_pred: array, predicted target values

    Returns:
    - None
    """
    # Plot actual vs predicted liquidity
    plt.figure(figsize=(12, 6))
    plt.scatter(y_test, y_pred)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
    plt.title('Actual vs Predicted Liquidity Ratio')
    plt.xlabel('Actual Liquidity Ratio')
    plt.ylabel('Predicted Liquidity Ratio')
    plt.show()

    # Print evaluation scores
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    print(f'MAE: {mae}')
    print(f'RMSE: {rmse}')
    print(f'R²: {r2}')

#8 (Optional) Deployment Preview

# model_evaluation.py

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def evaluate_model(y_test, y_pred):
    """
    Evaluate the model by plotting actual vs predicted values and printing evaluation scores.

    Parameters:
    - y_test: Series, actual target values
    - y_pred: array, predicted target values

    Returns:
    - None
    """
    # Plot actual vs predicted liquidity
    plt.figure(figsize=(12, 6))
    plt.scatter(y_test, y_pred)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
    plt.title('Actual vs Predicted Liquidity Ratio')
    plt.xlabel('Actual Liquidity Ratio')
    plt.ylabel('Predicted Liquidity Ratio')
    plt.show()

    # Print evaluation scores
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    print(f'MAE: {mae}')
    print(f'RMSE: {rmse}')
    print(f'R²: {r2}')



1. Introduction & Problem Statement
Project Goal: The goal of this project is to analyze cryptocurrency market data from March 16th and 17th, 2022, to predict the liquidity ratio of various cryptocurrencies using machine learning techniques. The liquidity ratio is defined as the ratio of 24-hour trading volume to market capitalization.

2.Solution Statement

We collected and combined historical cryptocurrency data from two different dates.
Feature engineering was applied to create liquidity indicators like the liquidity ratio.
A machine learning model (Random Forest Regressor) was trained to predict liquidity.
Model performance was evaluated using RMSE, MAE, and R² score.
This solution helps identify low-liquidity scenarios early, supporting stable market decisions.

Dataset used

https://drive.google.com/file/d/1ihTpQ6Ej4HYL2upgPUmnu_g2mp1prG6N/view?usp=drive_link

https://drive.google.com/file/d/1RKo7P1s35uqQDtkYjorzPNsVLW5SnPmU/view?usp=drive_link

Tech Stack Used

Python

FastAPI

Machine learning algorithms

Docker

MongoDB

Infrastructure required

AWS S3

Azure

Github Actions

How to run

2. Importing Libraries & Dataset

Step 1: Create the Package Structure

crypto_analysis/
│
├── __init__.py
├── data_loader.py
└── requirements.txt

Step 2: Write the Code

# __init__.py
"""
Crypto Analysis Package
"""
 requirements.txt

 pandas
numpy
matplotlib
seaborn
scikit-learn

Step 3: Install the Package
pip install -e .

3. Data Preprocessing

Step 1: Update the Package Structure

crypto_analysis/
│
├── __init__.py
├── data_loader.py
├── data_preprocessing.py
└── requirements.txt

Step 2: Install the Package

pip install -e .

4. Feature Engineering

Step 1: Update the Package Structure

crypto_analysis/
│
├── __init__.py
├── data_loader.py
├── data_preprocessing.py
└── feature_engineering.py
└── requirements.txt

Step 2: Install the Package

pip install -e .

5. Exploratory Data Analysis (EDA)

Step 1: Update the Package Structure

crypto_analysis/
│
├── __init__.py
├── data_loader.py
├── data_preprocessing.py
├── feature_engineering.py
└── exploratory_analysis.py
└── requirements.txt

Step 2: Install the Package

pip install -e .

6. Model Development

Step 1: Update the Package Structure
crypto_analysis/
│
├── __init__.py
├── data_loader.py
├── data_preprocessing.py
├── feature_engineering.py
├── exploratory_analysis.py
└── model_development.py
└── requirements.txt

Step 2: Install the Package

pip install -e .

7. Hyperparameter Tuning

Step 1: Update the Package Structure

crypto_analysis/
│
├── __init__.py
├── data_loader.py
├── data_preprocessing.py
├── feature_engineering.py
├── exploratory_analysis.py
├── model_development.py
└── hyperparameter_tuning.py
└── requirements.txt

Step 2: Install the Package

pip install -e .

8. Model Evaluation

Step 1: Update the Package Structure

crypto_analysis/
│
├── __init__.py
├── data_loader.py
├── data_preprocessing.py
├── feature_engineering.py
├── exploratory_analysis.py
├── model_development.py
├── hyperparameter_tuning.py
└── model_evaluation.py
└── requirements.txt

Step 2: Install the Package

pip install -e .

9. (Optional) Deployment Preview

Step 1: Update the Package Structure

crypto_analysis/
│
├── __init__.py
├── data_loader.py
├── data_preprocessing.py
├── feature_engineering.py
├── exploratory_analysis.py
├── model_development.py
├── hyperparameter_tuning.py
└── model_evaluation.py
└── requirements.txt

Step 2: Install the Package
pip install -e .