In [None]:
from pymongo import MongoClient
import pandas as pd
from datetime import timedelta

def get_latest_timestamp(collection):
    """
    Finds the latest timestamp in the MongoDB collection.
    """
    latest_doc = collection.find().sort("timestamp", -1).limit(1)
    latest_timestamp = None
    for doc in latest_doc:
        latest_timestamp = doc['timestamp']
    return latest_timestamp

def load_data_from_db(collection, start_date, end_date):
    """
    Loads data from the MongoDB collection between the specified start and end dates.
    """
    query = {"timestamp": {"$gte": start_date, "$lte": end_date}}
    projection = {"_id": 0}  # Exclude the _id field
    data = collection.find(query, projection)
    return pd.DataFrame(list(data))

# MongoDB connection setup
client = MongoClient('mongodb://localhost:27017/')  # Adjust as needed
db = client['crypto_db']  # Your database name
collection = db['ETH-USDT-SWAP-1m']  # Your collection name

# Get the latest timestamp in the collection
latest_timestamp_str = get_latest_timestamp(collection)
if latest_timestamp_str:
    # Convert the latest timestamp to a datetime object
    latest_timestamp = pd.to_datetime(latest_timestamp_str, format='%Y-%m-%d %H:%M:%S')


    # Calculate the start date as 1 month before the latest timestamp
    start_date = latest_timestamp - pd.DateOffset(months=1)

    # Convert start_date and latest_timestamp back to string if necessary for the query
    start_date_str = start_date.strftime('%Y%m%d%H%M%S')
    end_date_str = latest_timestamp.strftime('%Y%m%d%H%M%S')

    # Load data from the database between start_date and end_date
    df = load_data_from_db(collection, start_date_str, end_date_str)
    
    print(df)
else:
    print("No data found in the collection.")


In [None]:
import pandas as pd
data = collection.find(projection={"_id": 0, "timestamp": 1, "instId":1, "bar": 1, "open": 1, "high": 1, "low": 1, "close": 1, "volume": 1})
data_list = list(data)

if data_list:
    df = pd.DataFrame(data_list)
    print(df)
else:
    print("No data found for the specified date range.")


In [None]:
df.describe()

In [None]:
from pymongo import MongoClient
from datetime import datetime
import pandas as pd

def get_crypto_kline(collection, instId, bar, start_date=None, end_date=None):
    """
    Fetches k-line data for a given cryptocurrency instrument from MongoDB.
    
    Parameters:
    - collection: A pymongo collection object for querying.
    - instId: The instrument ID to query for.
    - bar: The granularity of the k-line data (e.g., "1D" for daily).
    - symbol: The symbol to query for.
    - start_date: The start date for the query range (inclusive).
    - end_date: The end date for the query range (inclusive).
    
    Returns:
    - A pandas DataFrame containing the k-line data.
    """
    # Convert start_date and end_date to UNIX timestamps in milliseconds
    query_filter = {
        # "instId": instId,
        # "bar": bar,
    }
    
    if start_date:
        start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp() * 1000)
        print(start_timestamp)
        query_filter["timestamp"] = {"$gte": start_timestamp}
    
    if end_date:
        end_timestamp = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp() * 1000)
        print(end_timestamp)
        if "timestamp" in query_filter:
            query_filter["timestamp"]["$lte"] = end_timestamp
        else:
            query_filter["timestamp"] = {"$lte": end_timestamp}
    
    # Perform the query
    cursor = collection.find(query_filter, {'_id': 0})
    # Convert the cursor to a pandas DataFrame
    df = pd.DataFrame(list(cursor))
    
    # Convert timestamps back to readable dates if necessary
    if not df.empty:
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    
    return df

# Example usage
if __name__ == "__main__":
    client = MongoClient('mongodb://192.168.31.120:27017/')
    db = client['crypto_db']  # Adjust as per your MongoDB setup
    collection = db['ETH-USDT-SWAP-1m']  # Adjust as per your MongoDB setup
    
    instId = "BTC-USDT-SWAP"
    bar = "1m"
    # start_date = "2021-01-01"
    # end_date = "2021-12-31"
    
    df = get_crypto_kline(collection, instId, bar)
    print(df)


In [None]:
from kaki.datafeed.reader.MongoDataReader import DownloadData
reader = DownloadData(target="crypto")
df = reader.download(symbol="BTC-USDT-SWAP", bar="1W", fields="full")

In [None]:
df

In [None]:
import mplfinance as mpf

df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)



In [None]:
import os
import shutil
# If existing, remove it by force
if os.path.exists('./output-full'):
    shutil.rmtree('./output-full')
os.mkdir('./output-full')
# Plotting the K-line chart
i = 0
while i < len(df):
    savefig_options = {
    'fname': f'./output-full/high_res_plot_{i}.png',  # Filename to save the plot
    'dpi': 300,  # Increase DPI for higher resolution
    'pad_inches': 0.25  # Optional: Padding around the figure
}
    mpf.plot(df[i:i+150], type='candle', style='charles',
            title=f'Crypto K-Line Chart:{df[i:i+150].iloc[0].instId}-{df[i:i+150].iloc[0].bar}',
            ylabel='Price', volume = True, mav=(7,12),savefig=savefig_options)
    i += 150

In [None]:
import pandas as pd
import mplfinance as mpf
test_df = pd.read_csv("~/Desktop/crypto_db.ETH-USDT-SWAP-1m.csv")
# test_df.set_index('timestamp', inplace=True)
test_df.set_index(pd.DatetimeIndex(test_df['timestamp']), inplace=True)
test_df.sort_index(inplace=True)
test_df.drop(columns=['_id','Date'], inplace=True)
display(test_df)
mpf.plot(test_df[:1000], type='candle', style='charles',
            title=f'Crypto K-Line Chart:',
            ylabel='Price', volume = True, mav=(7,12))

In [None]:
test_df.index

In [None]:
import os
import shutil
# If existing, remove it by force
if os.path.exists('./output-full'):
    shutil.rmtree('./output-full')
os.mkdir('./output-full')
# Plotting the K-line chart
i = 0
while i < len(df):
    savefig_options = {
    'fname': f'./output/high_res_plot_{i}.png',  # Filename to save the plot
    'dpi': 300,  # Increase DPI for higher resolution
    'pad_inches': 0.25  # Optional: Padding around the figure
}
    mpf.plot(df[i:i+150], type='candle', style='charles',
            title=f'Crypto K-Line Chart:',
            ylabel='Price', volume = True, mav=(7,12),savefig=savefig_options)
    i += 150

In [None]:
from dataclasses import dataclass
from pymongo import MongoClient
import logging
from dotenv import load_dotenv
from datetime import datetime
from kaki.utils.check_date import date_to_datetime
from typing import Union, Optional
import pandas as pd
load_dotenv("../config/db.env")

class DownloadData:
    def __init__(self, target: str) -> None:
        self.client = MongoClient()  # Assume this is correctly configured to connect to your MongoDB
        self.db = self.client[target]
        self.target = target
    def download(self, symbol: Union[str, None] = "BTC-USDT-SWAP", bar: str = "1D", start_date:str|None = None, end_date: str|None = None, fields=None):
        if start_date is not None:
            start_date = date_to_datetime(start_date)
        if end_date is not None:
            end_date = date_to_datetime(end_date)
        if self.target == "crypto":
            collection = self.db[f"kline-{bar}"]

        if start_date is None and end_date is None:
            query = {
                    "instId": symbol,
                    "bar": bar}
        print(query)
        projection = {}
        if fields == "full":
            projection = {"_id": 0}  # MongoDB returns all fields if projection is empty
        elif fields is None:
            projection = {"_id": 0, "open": 1, "low": 1, "high": 1, "close": 1, "volume": 1}  # Default OLHCV fields
        elif isinstance(fields, list):
            projection = {"_id": 0}
            for field in fields:
                if field in ["open", "low", "high", "close", "volume"]:  # Assuming these are the only valid fields
                    projection[field] = 1
                else:
                    logging.warning(f"Field '{field}' does not exist in the collection.")
                    raise Exception(f"Field '{field}' does not exist in the collection.")
        else:
            raise ValueError("Invalid fields argument. Must be 'full', None, or a list of field names.")

        cursor = collection.find(query, projection)
        # Return pd.DataFrame
        return pd.DataFrame(list(cursor))
        

    def get_collection_date_range(self, collection):
        pipeline = [
            {"$group": {"_id": None, "start_date": {"$min": "$timestamp"}, "end_date": {"$max": "$timestamp"}}}
        ]
        result = list(collection.aggregate(pipeline))
        if result:
            start_date = result[0]['start_date']
            end_date = result[0]['end_date']
            return [start_date, end_date]
        else:
            return [None, None]

# Example usage:
# reader = MongoDataReader('your_db_name', 'your_collection_name')
# data = reader.get_data('2023-01-01', '2023-01-31', fields=None)
# This will return data within the specified range with the default OLHCV fields.

if __name__ == "__main__":
    reader = DownloadData('crypto')
    data = reader.download(fields="full")
    print(data)
    data.plot(x='timestamp', y='close')