In [29]:
import os
import sys
import json
from datetime import datetime, timedelta
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pathlib import Path
import logging
from typing import Dict, Optional, List, Any
import requests
import time

# Add root directory to path for imports
current_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(current_dir, '..'))
sys.path.append(root_dir)

# Import custom modules
from Birdeye.Basics.Master_Functions import get_ohlcv_data_multi, format_number
import Birdeye.Basics.dontshare as d


In [30]:
def get_latest_file(folder_path: str, prefix: str) -> Optional[str]:
    """Get the most recent file with given prefix from specified folder."""
    files = [f for f in os.listdir(folder_path) if f.startswith(prefix) and f.endswith('.csv')]
    if not files:
        return None
    return max(files)  # This works because the timestamp is at the end of filename

def import_telegram_data() -> Dict[str, pd.DataFrame]:
    """Import the most recent data files from each Telegram data subfolder."""
    base_path = os.path.join(current_dir, 'data')
    data_dict = {}
    
    # Define folder and file prefix mappings
    folders = {
        'ath_price': ('ATH_Price', 'Telegram_ATH_Price_Data'),
        'pump_fdv': ('PUMP_FDV_Surge', 'Telegram_PUMP_FDV_Surge_Data'),
        'solana_fdv': ('Solana_FDV_Surge', 'Telegram_Solana_FDV_Surge_Data')
    }
    
    for key, (folder_name, file_prefix) in folders.items():
        folder_path = os.path.join(base_path, folder_name)
        latest_file = get_latest_file(folder_path, file_prefix)
        
        if latest_file:
            file_path = os.path.join(folder_path, latest_file)
            data_dict[key] = pd.read_csv(file_path)
        else:
            logging.warning(f"No CSV files found in {folder_path}")
    
    return data_dict

# Usage example:
telegram_data = import_telegram_data()
print(telegram_data)

{'ath_price':     row_index                              name  \
0           0                            Sendor   
1           1                               fip   
2           2                   Open Satoshi AI   
3           3                 You’re going back   
4           4                       TopProtocol   
5           5                            CATIME   
6           6                  Bellscoin Mascot   
7           7                        Tyson lost   
8           8                             Forky   
9           9                              goob   
10         10                          SHITLIST   
11         11  League of Extraordinary Memecoin   
12         12                UNITED STATES OF X   
13         13        Franklin The Turtle Puppet   
14         14                                 🍊   
15         15                         meowcoins   
16         16               Chief Troll Officer   
17         17                               kzl   
18         18    

In [37]:
def get_token_data(
    selected_folders: List[str],
    timeframes: List[str],
    num_recent_tokens: int = 3,
    lookback_hours: int = 24
) -> Dict[str, Dict[str, pd.DataFrame]]:
    """
    Get OHLCV data for selected tokens across specified timeframes.
    """
    # Get the telegram data first
    telegram_data = import_telegram_data()
    print("Imported telegram data keys:", telegram_data.keys())
    
    # Map folder names to their dictionary keys
    folder_mapping = {
        'ATH_Price': 'ath_price',
        'PUMP_FDV_Surge': 'pump_fdv',
        'Solana_FDV_Surge': 'solana_fdv'
    }
    
    result_data = {}
    
    for folder in selected_folders:
        print(f"\nProcessing folder: {folder}")
        if folder not in folder_mapping:
            logging.warning(f"Invalid folder name: {folder}")
            continue
            
        folder_key = folder_mapping[folder]
        if folder_key not in telegram_data:
            logging.warning(f"No data found for folder: {folder}")
            continue
            
        # Get the most recent token addresses from the CSV based on index
        df = telegram_data[folder_key]
        print(f"Columns in {folder} data:", df.columns.tolist())
        
        if 'token' not in df.columns:
            logging.warning(f"No token column in {folder} data")
            continue
            
        # Sort by row_index in descending order and take the top n rows
        df = df.sort_values('row_index', ascending=False).head(num_recent_tokens)
        recent_tokens = df['token'].tolist()
        print(f"Selected tokens for {folder}:", recent_tokens)
        
        # Store OHLCV data for each token and timeframe
        result_data[folder] = {}
        
        try:
            # Get OHLCV data for all tokens and timeframes at once
            ohlcv_data = get_ohlcv_data_multi(
                tokens=recent_tokens,
                API_Key=d.birdeye,  # Changed from api_key to API_Key
                timeframes=timeframes
            )
            
            # Process the data for each timeframe
            for timeframe in timeframes:
                all_token_data = []
                
                for token in recent_tokens:
                    if token in ohlcv_data and timeframe in ohlcv_data[token]:
                        df = ohlcv_data[token][timeframe]
                        if not df.empty:
                            df['token'] = token
                            all_token_data.append(df)
                            print(f"Successfully processed data for {token} {timeframe}")
                        else:
                            print(f"Empty data for {token} {timeframe}")
                    else:
                        print(f"No data found for {token} {timeframe}")
                
                if all_token_data:
                    result_data[folder][timeframe] = pd.concat(all_token_data, ignore_index=True)
                    print(f"Created DataFrame for {timeframe} with shape:", result_data[folder][timeframe].shape)
                else:
                    result_data[folder][timeframe] = pd.DataFrame()
                    print(f"No data found for {timeframe}")
                    
        except Exception as e:
            logging.error(f"Error processing folder {folder}: {str(e)}")
    
    return result_data

# Test the function
selected_folders = ['ATH_Price', 'PUMP_FDV_Surge']
timeframes = ['1m', '5m', '15m', '1H']  # Make sure these match the timeframes in the function
num_recent_tokens = 3
lookback_hours = 24

token_data = get_token_data(
    selected_folders=selected_folders,
    timeframes=timeframes,
    num_recent_tokens=num_recent_tokens,
    lookback_hours=lookback_hours
)

print(token_data)

Imported telegram data keys: dict_keys(['ath_price', 'pump_fdv', 'solana_fdv'])

Processing folder: ATH_Price
Columns in ATH_Price data: ['row_index', 'name', 'token', '5m_change', '1h_change', '6h_change', '5m_transactions', '5m_volume', 'mcp', 'liquidity_sol', 'holders', 'open_time', 'top10_holders', 'dev_hold_%', 'dev_burnt', 'telegram', 'timestamp']
Selected tokens for ATH_Price: ['7dxQE4YXrWqtwUVb8W8HjfPT3t4H4dWZfsDWRWKupump', '8F5mYqeeadXMDqupxukvTK3vp8aeuJALZ13h9FESpump', '6SNrpb2rrKn45CX5bmrDDsZvKC5QAuGZniMPHHmCpump']



Successfully retrieved 7dxQE4YXrWqtwUVb8W8HjfPT3t4H4dWZfsDWRWKupump - 1m data.

Successfully retrieved 7dxQE4YXrWqtwUVb8W8HjfPT3t4H4dWZfsDWRWKupump - 5m data.

Successfully retrieved 7dxQE4YXrWqtwUVb8W8HjfPT3t4H4dWZfsDWRWKupump - 15m data.

Successfully retrieved 7dxQE4YXrWqtwUVb8W8HjfPT3t4H4dWZfsDWRWKupump - 1H data.

No data items found for token 8F5mYqeeadXMDqupxukvTK3vp8aeuJALZ13h9FESpump, timeframe 1m.

No data items found for token 8F5mYqeeadXMDqupxukvTK3vp8aeuJALZ13h9FESpump, timeframe 5m.

No data items found for token 8F5mYqeeadXMDqupxukvTK3vp8aeuJALZ13h9FESpump, timeframe 15m.

No data items found for token 8F5mYqeeadXMDqupxukvTK3vp8aeuJALZ13h9FESpump, timeframe 1H.

No data items found for token 6SNrpb2rrKn45CX5bmrDDsZvKC5QAuGZniMPHHmCpump, timeframe 1m.

No data items found for token 6SNrpb2rrKn45CX5bmrDDsZvKC5QAuGZniMPHHmCpump, timeframe 5m.

No data items found for token 6SNrpb2rrKn45CX5bmrDDsZvKC5QAuGZniMPHHmCpump, timeframe 15m.

No data items found for token 6SNrpb2rr