In [31]:
# Load the necessary libraries and data again
import pandas as pd
from datetime import timedelta

In [2]:
# Load the CSV data
data = pd.read_csv('./ranked_by_data.csv')

# Initialize empty lists to hold the date, cryptocurrency name, and market cap
dates = []
cryptos = []
market_caps = []

# Loop over each date column in the dataframe
for col in data.columns[1:]:
    # Loop over each row in the column
    for val in data[col]:
        # Split the tuple into its two components
        crypto, market_cap = eval(val)
        # Append the data to the lists
        dates.append(col)
        cryptos.append(crypto)
        market_caps.append(float(market_cap))

# Create a new dataframe with the transformed data
df = pd.DataFrame({
    'Date': pd.to_datetime(dates),
    'Crypto': cryptos,
    'MarketCap': market_caps
})

In [32]:
# Load data directly from pickle

datadir = './data'
fn = datadir + "/crypto_market_data_for_3311_coins_20230710_061453.pkl"
f = open(fn, 'rb')

data = pd.read_pickle(fn)

In [33]:

# Initialize a list to store DataFrames
dfs = []
# Iterate over each cryptocurrency
for crypto, crypto_data in data.items():
    if not isinstance(crypto_data, dict):
        continue

    # Convert the nested dictionary to a DataFrame
    temp_df = pd.DataFrame({
        'Date': [x[0] for x in crypto_data['market_caps']],
        'MarketCap': [x[1] for x in crypto_data['market_caps']],
    })
    
    # Convert Unix timestamp to datetime
    temp_df['Date'] = pd.to_datetime(temp_df['Date'], unit='ms')
    
    # Add a column for the cryptocurrency name
    temp_df['Crypto'] = crypto
    
    # Append DataFrame to the list
    dfs.append(temp_df)

# Concatenate all DataFrames
crypto_df = pd.concat(dfs, ignore_index=True)

# Filter out rows with zero market cap
crypto_df = crypto_df[crypto_df['MarketCap'] != 0]

# Filter out rows before 2017-01-01
start_date = '2017-01-01'
crypto_df = crypto_df[crypto_df['Date'] >= start_date]

df = crypto_df

In [2]:
#latest
import pandas as pd
from datetime import datetime, timedelta
from pymongo import MongoClient

def connect_to_db():
    # Replace with your actual database connection logic
    client = MongoClient('mongodb://localhost:27017/')
    return client['crypto_db']

def get_historical_dataframe(years=1):
    db = connect_to_db()
    start_date = datetime.now() - timedelta(days=365 * years)
    pipeline = [
        {
            "$match": {
                "timestamp": {"$gte": start_date}
            }
        },
        {
            "$project": {
                "_id": 0,
                "timestamp": 1,
                "coin_id": 1,
                "price": "$stats.price",
                "market_cap": "$stats.market_cap",
                "volume": "$stats.volume"
            }
        }
    ]
    cursor = db.historical_data.aggregate(pipeline)
    df = pd.DataFrame(list(cursor))
    df = df.rename(columns={
        'timestamp': 'Date',
        'coin_id': 'Crypto',
        'market_cap': 'MarketCap',
        'price': 'Price',
        'volume': 'Volume'
    })
    # Filter out rows with zero market cap
    df = df[df['MarketCap'] != 0]
    return df



In [3]:
df = get_historical_dataframe(years = 5)

In [4]:
df = df.sort_values(by=['Date', 'MarketCap'], ascending=[True, False])

# Calculate the rank of each cryptocurrency on each date
df['Rank'] = df.groupby('Date')['MarketCap'].rank(method='first', ascending=False)

In [5]:
crypto_list = df.Crypto.unique()

In [6]:
len(crypto_list)

7390

In [8]:
# Function to get the cryptocurrencies that entered the top x*100 from outside the top y*100 in a given week
def get_gaining_rank_by_cryptos_old(x, start_date):

    # Convert start_date to datetime
    start_date = pd.to_datetime(start_date)
    
    # Calculate end date
    end_date = start_date + timedelta(days=6)
    
    # Filter the data to only include the specified week
    week_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Find the cryptocurrencies that were outside the top y*100 at the start of the week
    outside_top_y_start = week_data[week_data['Date'] == start_date]
    outside_top_y_start = outside_top_y_start[outside_top_y_start['Rank'] > x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were inside the top x*100 at the end of the week
    inside_top_x_end = week_data[week_data['Date'] == end_date]
    inside_top_x_end = inside_top_x_end[inside_top_x_end['Rank'] <= x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were outside the top y*100 at the start of the week but inside the top x*100 at the end
    entered_top_x = [crypto for crypto in outside_top_y_start if crypto in inside_top_x_end]

    # Get previous rank and end rank for each crypto that entered the top x*100
    crypto_ranks = {crypto: (week_data[(week_data['Crypto'] == crypto) & (week_data['Date'] == start_date)]['Rank'].values[0], 
                              week_data[(week_data['Crypto'] == crypto) & (week_data['Date'] == end_date)]['Rank'].values[0]) for crypto in entered_top_x}

    return {k: v for k, v in sorted(crypto_ranks.items(), key=lambda item: item[1][1])}  # Sort by end rank

In [9]:
# Redefine the function to include the times of ranks
def get_gaining_rank_by_cryptos(x, start_date):
    # Convert start_date to datetime
    start_date = pd.to_datetime(start_date)
    
    # Calculate end date
    end_date = start_date + timedelta(days=6)
    
    # Filter the data to only include the specified week
    week_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Find the cryptocurrencies that were outside the top y*100 at the start of the week
    outside_top_y_start = week_data[week_data['Date'] == start_date]
    outside_top_y_start = outside_top_y_start[outside_top_y_start['Rank'] > x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were inside the top x*100 at the end of the week
    inside_top_x_end = week_data[week_data['Date'] == end_date]
    inside_top_x_end = inside_top_x_end[inside_top_x_end['Rank'] <= x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were outside the top y*100 at the start of the week but inside the top x*100 at the end
    entered_top_x = [crypto for crypto in outside_top_y_start if crypto in inside_top_x_end]

    # Get previous rank and end rank for each crypto that entered the top x*100
    crypto_ranks = {crypto: {str(start_date.date()): week_data[(week_data['Crypto'] == crypto) & (week_data['Date'] == start_date)]['Rank'].values[0], 
                              str(end_date.date()): week_data[(week_data['Crypto'] == crypto) & (week_data['Date'] == end_date)]['Rank'].values[0]} for crypto in entered_top_x}

    return {k: v for k, v in sorted(crypto_ranks.items(), key=lambda item: item[1][str(end_date.date())])}  # Sort by end rank


In [12]:
# Run the function with x = 2 and the third week of January, 2023
get_gaining_rank_by_cryptos(2, '2024-11-03')

{}

In [13]:
# Gaining rank: outside of top x*100 to inside top x*100
     # Before: Rank > x * 100
     # After:  Rank <= x * 100
# Gaining rank: outside of top x*100 to between x*100 and (x-1)*100
     # Before: Rank > x * 100
     # After: (x-1)*100 < Rank <= x * 100
# Losing rank: top x*100 to outside of top x*100
     # Before: Rank <= x * 100
     # After:  Rank > x * 100
# Losing rank: top x*100 to between top x*100 and (x+1)*100
     # Before: Rank <= x * 100
     # After: x*100 > Rank >= (x+1) * 100
# Top Coins by gained rank in a given period
     # Sort the coins by the increase in rank in a given period (from_week, to_week)
# Top Coins by lost rank in a given period
    # Sort the coins by the decrese in rank in a given period (from_week, to_week)

In [51]:
# Function 2: Gaining rank: outside of top x*100 to between x*100 and (x-1)*100
def get_gaining_rank_by_cryptos_2(x, start_date):
    # Convert start_date to datetime
    start_date = pd.to_datetime(start_date)
    
    # Calculate end date
    end_date = start_date + timedelta(days=6)
    
    # Filter the data to only include the specified week
    week_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Find the cryptocurrencies that were outside the top y*100 before and including the start of the week
    outside_top_y_start = df[df['Date'] == start_date]
    outside_top_y_start = outside_top_y_start[outside_top_y_start['Rank'] > x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were inside the top x*100 and outside (x-1)*100 at the end of the week
    inside_top_x_end = week_data[week_data['Date'] == end_date]
    inside_top_x_end = inside_top_x_end[(inside_top_x_end['Rank'] <= x*100) & (inside_top_x_end['Rank'] > (x-1)*100)]['Crypto'].unique()

    # Find the cryptocurrencies that were outside the top y*100 at the start of the week but inside the top x*100 at the end
    entered_top_x = [crypto for crypto in outside_top_y_start if crypto in inside_top_x_end]

    # Get previous rank and end rank for each crypto that entered the top x*100
    crypto_ranks = {crypto: (week_data[(week_data['Crypto'] == crypto) & (week_data['Date'] == start_date)]['Rank'].values[0], 
                              week_data[(week_data['Crypto'] == crypto) & (week_data['Date'] == end_date)]['Rank'].values[0]) for crypto in entered_top_x}

    return {k: v for k, v in sorted(crypto_ranks.items(), key=lambda item: item[1][1])}  # Sort by end rank



In [52]:
# Run the function with x = 2 and the third week of January, 2023
get_gaining_rank_by_cryptos_2(2, '2023-01-15')

{'klaycity-orb': (879.0, 166.0),
 'nucypher': (207.0, 177.0),
 'lukso-token': (201.0, 189.0),
 'synapse-2': (213.0, 192.0),
 'gains-farm': (208.0, 193.0),
 'constellation-labs': (211.0, 196.0),
 'swipe': (206.0, 199.0)}

In [50]:
import pprint as pp
# outside of top 200 to inside top 200
a = get_gaining_rank_by_cryptos(2, '2023-02-15')
pp.pprint(a)


{'alchemy-pay': {'2023-02-15': 339.0, '2023-02-21': 197.0},
 'biconomy': {'2023-02-15': 205.0, '2023-02-21': 192.0},
 'conflux-token': {'2023-02-15': 253.0, '2023-02-21': 78.0},
 'nervos-network': {'2023-02-15': 245.0, '2023-02-21': 198.0}}


In [53]:

# outside of top 200 to inside top 100 - top 200
b = get_gaining_rank_by_cryptos_2(2, '2023-02-15') 
pp.pprint(b)

{'alchemy-pay': (339.0, 197.0),
 'biconomy': (205.0, 192.0),
 'nervos-network': (245.0, 198.0)}


In [36]:
# Load the necessary libraries and data again
import pandas as pd
from datetime import timedelta

# Load the CSV data
data = pd.read_csv('./ranked_by_data.csv')

# Initialize empty lists to hold the date, cryptocurrency name, and market cap
dates = []
cryptos = []
market_caps = []

# Loop over each date column in the dataframe
for col in data.columns[1:]:
    # Loop over each row in the column
    for val in data[col]:
        # Split the tuple into its two components
        crypto, market_cap = eval(val)
        # Append the data to the lists
        dates.append(col)
        cryptos.append(crypto)
        market_caps.append(float(market_cap))

# Create a new dataframe with the transformed data
df = pd.DataFrame({
    'Date': pd.to_datetime(dates),
    'Crypto': cryptos,
    'MarketCap': market_caps
})

df = df.sort_values(by=['Date', 'MarketCap'], ascending=[True, False])

# Calculate the rank of each cryptocurrency on each date
df['Rank'] = df.groupby('Date')['MarketCap'].rank(method='first', ascending=False)


In [55]:
# Redefine the function to include the highest and lowest rank over the lifetime of each cryptocurrency
def get_gaining_rank_by_cryptos_interval(x, start_date, end_date):
    # Convert start_date and end_date to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter the data to only include the specified interval
    interval_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Find the cryptocurrencies that were outside the top y*100 at the start of the interval
    outside_top_y_start = interval_data[interval_data['Date'] == start_date]
    outside_top_y_start = outside_top_y_start[outside_top_y_start['Rank'] > x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were inside the top x*100 at the end of the interval
    inside_top_x_end = interval_data[interval_data['Date'] == end_date]
    inside_top_x_end = inside_top_x_end[inside_top_x_end['Rank'] <= x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were outside the top y*100 at the start of the interval but inside the top x*100 at the end
    entered_top_x = [crypto for crypto in outside_top_y_start if crypto in inside_top_x_end]

    # Get rank data for each crypto that entered the top x*100
    crypto_ranks = {}
    for crypto in entered_top_x:
        crypto_data = df[df['Crypto'] == crypto]
        highest_rank_date = crypto_data['Rank'].idxmin()
        lowest_rank_date = crypto_data['Rank'].idxmax()
        crypto_ranks[crypto] = {
            "rank_start": {str(start_date.date()): interval_data[(interval_data['Crypto'] == crypto) & (interval_data['Date'] == start_date)]['Rank'].values[0]},
            "rank_end": {str(end_date.date()): interval_data[(interval_data['Crypto'] == crypto) & (interval_data['Date'] == end_date)]['Rank'].values[0]},
            "rank_highest": {str(crypto_data.loc[highest_rank_date, 'Date'].date()): crypto_data.loc[highest_rank_date, 'Rank']} if pd.notnull(crypto_data.loc[highest_rank_date, 'Rank']) else "No data",
            "rank_lowest": {str(crypto_data.loc[lowest_rank_date, 'Date'].date()): crypto_data.loc[lowest_rank_date, 'Rank']}
        }

    return {k: v for k, v in sorted(crypto_ranks.items(), key=lambda item: item[1]["rank_end"][str(end_date.date())])}  # Sort by end rank

In [57]:
# Run the function with x = 2 and the interval from January 15, 2023, to February 15, 2023
get_gaining_rank_by_cryptos_interval(2, '2022-01-15', '2023-07-01')

{'lido-dao': {'rank_start': {'2022-01-15': 332.0},
  'rank_end': {'2023-07-01': 31.0},
  'rank_highest': {'2023-07-04': 1.0},
  'rank_lowest': {'2021-03-27': 755.0}},
 'injective-protocol': {'rank_start': {'2022-01-15': 239.0},
  'rank_end': {'2023-07-01': 71.0},
  'rank_highest': {'2023-07-04': 1.0},
  'rank_lowest': {'2020-10-29': 330.0}},
 'gemini-dollar': {'rank_start': {'2022-01-15': 321.0},
  'rank_end': {'2023-07-01': 78.0},
  'rank_highest': {'2023-07-04': 1.0},
  'rank_lowest': {'2018-10-11': 464.0}},
 'gmx': {'rank_start': {'2022-01-15': 237.0},
  'rank_end': {'2023-07-01': 85.0},
  'rank_highest': {'2023-07-04': 3.0},
  'rank_lowest': {'2021-09-21': 419.0}},
 'pax-gold': {'rank_start': {'2022-01-15': 230.0},
  'rank_end': {'2023-07-01': 86.0},
  'rank_highest': {'2023-07-04': 1.0},
  'rank_lowest': {'2021-05-04': 382.0}},
 'conflux-token': {'rank_start': {'2022-01-15': 276.0},
  'rank_end': {'2023-07-01': 95.0},
  'rank_highest': {'2023-07-04': 1.0},
  'rank_lowest': {'2022-

In [58]:
# Redefine the function to include the highest and lowest rank during the interval and over the lifetime of each cryptocurrency
def get_gaining_rank_by_cryptos_overall(x, start_date, end_date):
    # Convert start_date and end_date to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter the data to only include the specified interval
    interval_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Find the cryptocurrencies that were outside the top y*100 at the start of the interval
    outside_top_y_start = interval_data[interval_data['Date'] == start_date]
    outside_top_y_start = outside_top_y_start[outside_top_y_start['Rank'] > x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were inside the top x*100 at the end of the interval
    inside_top_x_end = interval_data[interval_data['Date'] == end_date]
    inside_top_x_end = inside_top_x_end[inside_top_x_end['Rank'] <= x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were outside the top y*100 at the start of the interval but inside the top x*100 at the end
    entered_top_x = [crypto for crypto in outside_top_y_start if crypto in inside_top_x_end]

    # Get rank data for each crypto that entered the top x*100
    crypto_ranks = {}
    for crypto in entered_top_x:
        crypto_data = df[df['Crypto'] == crypto]
        interval_crypto_data = interval_data[interval_data['Crypto'] == crypto]
        highest_rank_date_lifetime = crypto_data['Rank'].idxmin()
        lowest_rank_date_lifetime = crypto_data['Rank'].idxmax()
        highest_rank_date_interval = interval_crypto_data['Rank'].idxmin()
        lowest_rank_date_interval = interval_crypto_data['Rank'].idxmax()
        crypto_ranks[crypto] = {
            "rank_start": {str(start_date.date()): interval_crypto_data[interval_crypto_data['Date'] == start_date]['Rank'].values[0]},
            "rank_end": {str(end_date.date()): interval_crypto_data[interval_crypto_data['Date'] == end_date]['Rank'].values[0]},
            "rank_highest_during_interval": {str(interval_crypto_data.loc[highest_rank_date_interval, 'Date'].date()): interval_crypto_data.loc[highest_rank_date_interval, 'Rank']} if pd.notnull(interval_crypto_data.loc[highest_rank_date_interval, 'Rank']) else "No data",
            "rank_lowest_during_interval": {str(interval_crypto_data.loc[lowest_rank_date_interval, 'Date'].date()): interval_crypto_data.loc[lowest_rank_date_interval, 'Rank']},
            "rank_highest_lifetime": {str(crypto_data.loc[highest_rank_date_lifetime, 'Date'].date()): crypto_data.loc[highest_rank_date_lifetime, 'Rank']} if pd.notnull(crypto_data.loc[highest_rank_date_lifetime, 'Rank']) else "No data",
            "rank_lowest_lifetime": {str(crypto_data.loc[lowest_rank_date_lifetime, 'Date'].date()): crypto_data.loc[lowest_rank_date_lifetime, 'Rank']}
        }

    return {k: v for k, v in sorted(crypto_ranks.items(), key=lambda item: item[1]["rank_end"][str(end_date.date())])}  # Sort by end rank


In [61]:
# Run the function with x = 2 and the interval from January 15, 2023, to February 15, 2023
r = get_gaining_rank_by_cryptos_overall(2, '2023-01-01', '2023-07-01')
r

{'injective-protocol': {'rank_start': {'2023-01-01': 205.0},
  'rank_end': {'2023-07-01': 71.0},
  'rank_highest_during_interval': {'2023-04-28': 68.0},
  'rank_lowest_during_interval': {'2023-01-19': 223.0},
  'rank_highest_lifetime': {'2023-07-04': 1.0},
  'rank_lowest_lifetime': {'2020-10-29': 330.0}},
 'kaspa': {'rank_start': {'2023-01-01': 227.0},
  'rank_end': {'2023-07-01': 82.0},
  'rank_highest_during_interval': {'2023-04-03': 69.0},
  'rank_lowest_during_interval': {'2023-02-04': 305.0},
  'rank_highest_lifetime': {'2023-07-04': 1.0},
  'rank_lowest_lifetime': {'2022-08-10': 984.0}},
 'conflux-token': {'rank_start': {'2023-01-01': 359.0},
  'rank_end': {'2023-07-01': 95.0},
  'rank_highest_during_interval': {'2023-03-20': 56.0},
  'rank_lowest_during_interval': {'2023-01-21': 369.0},
  'rank_highest_lifetime': {'2023-07-04': 1.0},
  'rank_lowest_lifetime': {'2022-12-26': 371.0}},
 'flex-coin': {'rank_start': {'2023-01-01': 1001.0},
  'rank_end': {'2023-07-01': 101.0},
  'rank

In [43]:
# Load the necessary libraries and data again
import pandas as pd
from datetime import timedelta

# Load the CSV data
data = pd.read_csv('./ranked_by_data.csv')

# Initialize empty lists to hold the date, cryptocurrency name, and market cap
dates = []
cryptos = []
market_caps = []

# Loop over each date column in the dataframe
for col in data.columns[1:]:
    # Loop over each row in the column
    for val in data[col]:
        # Split the tuple into its two components
        crypto, market_cap = eval(val)
        # Append the data to the lists
        dates.append(col)
        cryptos.append(crypto)
        market_caps.append(float(market_cap))

In [74]:
import dtale
d = dtale.show(data)
d.open_browser()

In [86]:
d = dtale.show(data.iloc[:, [0, 1, 2, 104, 1133, 2210]])

In [87]:
d.open_browser()

In [41]:
# Load the necessary libraries and data again
import pandas as pd
from datetime import timedelta
import numpy as np

# Load the CSV data
data = pd.read_csv('./ranked_by_data.csv')

# Initialize empty lists to hold the date, cryptocurrency name, and market cap
dates = []
cryptos = []
market_caps = []

# Loop over each date column in the dataframe
for col in data.columns[1:]:
    # Loop over each row in the column
    for val in data[col]:
        # Split the tuple into its two components
        crypto, market_cap = eval(val)
        # Append the data to the lists
        dates.append(col)
        cryptos.append(crypto)
        # Replace 0.0 market cap with NaN
        market_caps.append(float(market_cap) if float(market_cap) != 0.0 else np.nan)

# Create a new dataframe with the transformed data
df = pd.DataFrame({
    'Date': pd.to_datetime(dates),
    'Crypto': cryptos,
    'MarketCap': market_caps
})

# Drop the rows with NaN MarketCap (which were 0.0 in the original data)
df = df.dropna(subset=['MarketCap'])

df = df.sort_values(by=['Date', 'MarketCap'], ascending=[True, False])

# Calculate the rank of each cryptocurrency on each date
df['Rank'] = df.groupby('Date')['MarketCap'].rank(method='first', ascending=False)

# Calculate the origin date of each cryptocurrency
crypto_origin = df.groupby('Crypto')['Date'].min().to_dict()
crypto_origin_marketcap = df[df.groupby('Crypto')['Date'].transform(min) == df['Date']].set_index('Crypto')['MarketCap'].to_dict()


In [58]:
# Function to get the cryptocurrencies that moved from outside of top x*100 to between (x-1)*100 and x*100
def get_gaining_rank_between_by_cryptos(x, start_date, end_date):
    # Convert start_date and end_date to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter the data to only include the specified interval
    interval_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Find the cryptocurrencies that were outside the top x*100 at any day during the interval before end date
    outside_top_x_start = interval_data[interval_data['Date'] < end_date]
    outside_top_x_start = outside_top_x_start[outside_top_x_start['Rank'] > x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were between (x-1)*100 and x*100 at the end of the interval
    between_top_x_end = interval_data[interval_data['Date'] == end_date]
    between_top_x_end = between_top_x_end[(between_top_x_end['Rank'] > (x-1)*100) & (between_top_x_end['Rank'] <= x*100)]['Crypto'].unique()

    # Find the cryptocurrencies that were outside the top x*100 at the start of the interval but between (x-1)*100 and x*100 at the end
    entered_top_x = [crypto for crypto in outside_top_x_start if crypto in between_top_x_end]

    # Get rank start, rank end, highest and lowest rank during interval, and highest and lowest rank lifetime for each crypto
    crypto_ranks = {}
    for crypto in entered_top_x:
        crypto_data = df[df['Crypto'] == crypto]
        crypto_interval_data = interval_data[interval_data['Crypto'] == crypto]
        
        # Handle empty arrays
        if crypto_interval_data[crypto_interval_data['Date'] == start_date]['Rank'].values.size == 0:
            rank_start = 'N/A'
        else:
            rank_start = crypto_interval_data[crypto_interval_data['Date'] == start_date]['Rank'].values[0]
            
        if crypto_interval_data[crypto_interval_data['Date'] == end_date]['Rank'].values.size == 0:
            rank_end = 'N/A'
        else:
            rank_end = crypto_interval_data[crypto_interval_data['Date'] == end_date]['Rank'].values[0]
            
        rank_highest_during_interval_date = crypto_interval_data[crypto_interval_data['Rank'] == crypto_interval_data['Rank'].min()]['Date'].values[0]
        rank_lowest_during_interval_date = crypto_interval_data[crypto_interval_data['Rank'] == crypto_interval_data['Rank'].max()]['Date'].values[0]
        rank_highest_lifetime_date = crypto_data[crypto_data['Rank'] == crypto_data['Rank'].min()]['Date'].values[0]
        rank_lowest_lifetime_date = crypto_data[crypto_data['Rank'] == crypto_data['Rank'].max()]['Date'].values[0]
        crypto_ranks[crypto] = {
            'rank_start': {str(start_date.date()): rank_start},
            'rank_end': {str(end_date.date()): rank_end},
            'rank_highest_during_interval': {str(rank_highest_during_interval_date): crypto_interval_data['Rank'].min()},
            'rank_lowest_during_interval': {str(rank_lowest_during_interval_date): crypto_interval_data['Rank'].max()},
            'rank_highest_lifetime': {str(rank_highest_lifetime_date): crypto_data['Rank'].min()},
            'rank_lowest_lifetime': {str(rank_lowest_lifetime_date): crypto_data['Rank'].max()},
            'crypto_origin': {str(crypto_origin[crypto].date()): crypto_origin_marketcap[crypto]}
        }

    return crypto_ranks



In [63]:
# Test the function with x = 6 and the interval from January 15, 2023 to February 15, 2023
get_gaining_rank_between_by_cryptos(5, '2020-01-15', '2023-02-15')


{'xmon': {'rank_start': {'2020-01-15': 'N/A'},
  'rank_end': {'2023-02-15': 436.0},
  'rank_highest_during_interval': {'2022-09-03T00:00:00.000000000': 254.0},
  'rank_lowest_during_interval': {'2022-06-17T00:00:00.000000000': 685.0},
  'rank_highest_lifetime': {'2022-09-03T00:00:00.000000000': 254.0},
  'rank_lowest_lifetime': {'2022-06-17T00:00:00.000000000': 685.0},
  'crypto_origin': {'2021-02-18': 3161751.153535594}},
 'cortex': {'rank_start': {'2020-01-15': 222.0},
  'rank_end': {'2023-02-15': 406.0},
  'rank_highest_during_interval': {'2020-01-19T00:00:00.000000000': 220.0},
  'rank_lowest_during_interval': {'2022-11-10T00:00:00.000000000': 651.0},
  'rank_highest_lifetime': {'2018-04-30T00:00:00.000000000': 54.0},
  'rank_lowest_lifetime': {'2022-11-10T00:00:00.000000000': 651.0},
  'crypto_origin': {'2018-04-18': 151150177.6725948}},
 'wilder-world': {'rank_start': {'2020-01-15': 'N/A'},
  'rank_end': {'2023-02-15': 470.0},
  'rank_highest_during_interval': {'2021-11-28T00:00:

In [64]:
# Function to get the ranking timeseries of a given cryptocurrency from its birth till present time
def get_rank_timeseries(crypto):
    # Filter the data to only include the given cryptocurrency
    crypto_data = df[df['Crypto'] == crypto]
    
    # Return the date and rank columns
    return crypto_data[['Date', 'Rank']]

In [65]:
cryptos = get_gaining_rank_between_by_cryptos(4, '2023-01-15', '2023-02-15').keys()
crypto_rank_timeseries = {crypto: get_rank_timeseries(crypto) for crypto in cryptos}

crypto_rank_timeseries

{'aergo':               Date   Rank
 789426  2019-05-31  127.0
 790326  2019-06-01  127.0
 791229  2019-06-02  130.0
 792130  2019-06-03  131.0
 793026  2019-06-04  127.0
 ...            ...    ...
 2017315 2023-02-23  416.0
 2018209 2023-02-24  410.0
 2019098 2023-02-25  399.0
 2020005 2023-02-26  406.0
 2020914 2023-02-27  415.0
 
 [1369 rows x 2 columns],
 'morpheus-network':               Date   Rank
 454637  2018-05-24  138.0
 455535  2018-05-25  136.0
 456436  2018-05-26  137.0
 457330  2018-05-27  131.0
 458232  2018-05-28  133.0
 ...            ...    ...
 2017280 2023-02-23  381.0
 2018179 2023-02-24  380.0
 2019085 2023-02-25  386.0
 2019999 2023-02-26  400.0
 2020891 2023-02-27  392.0
 
 [1075 rows x 2 columns],
 'alchemy-pay':               Date   Rank
 1216092 2020-09-16  193.0
 1216994 2020-09-17  195.0
 1217898 2020-09-18  199.0
 1218797 2020-09-19  198.0
 1219699 2020-09-20  200.0
 ...            ...    ...
 2017084 2023-02-23  185.0
 2017987 2023-02-24  188.0
 2018903 

In [68]:
import matplotlib.pyplot as plt

# Function to plot the rank timeseries of a given cryptocurrency
def plot_rank_timeseries(crypto_rank_data):
    plt.figure(figsize=(15,8))
    for crypto in crypto_rank_data:
        dates = [pd.to_datetime(date) for date in crypto_rank_data[crypto]['Date']]
        ranks = crypto_rank_data[crypto]['Rank']
        plt.plot(dates, ranks, label=crypto)
        
    plt.xlabel('Date')
    plt.ylabel('Rank')
    plt.title('Cryptocurrency Rank Timeseries')
    plt.legend()
    plt.show()

In [69]:
import matplotlib.pyplot as plt

# Function to plot the rank timeseries of a given cryptocurrency
def plot_rank_timeseries(crypto_rank_data):
    plt.figure(figsize=(15,8))
    for crypto in crypto_rank_data:
        dates = [pd.to_datetime(date) for date in crypto_rank_data[crypto]['Date']]
        ranks = crypto_rank_data[crypto]['Rank']
        plt.plot(dates, ranks, label=crypto)
        
    plt.xlabel('Date')
    plt.ylabel('Rank')
    plt.title('Cryptocurrency Rank Timeseries')
    plt.legend()
    plt.show()

In [None]:
import matplotlib.pyplot as plt

# Plot the rank time series for each cryptocurrency
fig, axs = plt.subplots(len(crypto_rank_timeseries), 1, figsize=(10, 5*len(crypto_rank_timeseries)))

for i, (crypto, time_series) in enumerate(crypto_rank_timeseries.items()):
    axs[i].plot(time_series['Date'], time_series['Rank'])
    axs[i].invert_yaxis()  # So that rank 1 is at the top
    axs[i].set_title(crypto)
    
plt.tight_layout()
plt.show()

In [70]:
# Function to get the cryptocurrencies that moved from top x*100 to outside top x*100
def get_losing_rank_by_cryptos(x, start_date, end_date):
    # Convert start_date and end_date to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter the data to only include the specified interval
    interval_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Find the cryptocurrencies that were inside the top x*100 at the start of the interval
    inside_top_x_start = interval_data[interval_data['Date'] < end_date]
    inside_top_x_start = inside_top_x_start[inside_top_x_start['Rank'] <= x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were outside the top x*100 at the end of the interval
    outside_top_x_end = interval_data[interval_data['Date'] == end_date]
    outside_top_x_end = outside_top_x_end[outside_top_x_end['Rank'] > x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were inside the top x*100 at the start of the interval but outside the top x*100 at the end
    left_top_x = [crypto for crypto in inside_top_x_start if crypto in outside_top_x_end]

    # Get rank start, rank end, highest and lowest rank during interval, and highest and lowest rank lifetime for each crypto
    crypto_ranks = {}
    for crypto in left_top_x:
        crypto_data = df[df['Crypto'] == crypto]
        crypto_interval_data = interval_data[interval_data['Crypto'] == crypto]
        rank_highest_during_interval_date = crypto_interval_data[crypto_interval_data['Rank'] == crypto_interval_data['Rank'].min()]['Date'].values[0]
        rank_lowest_during_interval_date = crypto_interval_data[crypto_interval_data['Rank'] == crypto_interval_data['Rank'].max()]['Date'].values[0]
        rank_highest_lifetime_date = crypto_data[crypto_data['Rank'] == crypto_data['Rank'].min()]['Date'].values[0]
        rank_lowest_lifetime_date = crypto_data[crypto_data['Rank'] == crypto_data['Rank'].max()]['Date'].values[0]
        crypto_ranks[crypto] = {
            'rank_start': {str(start_date.date()): crypto_interval_data[crypto_interval_data['Date'] == start_date]['Rank'].values[0]},
            'rank_end': {str(end_date.date()): crypto_interval_data[crypto_interval_data['Date'] == end_date]['Rank'].values[0]},
            'rank_highest_during_interval': {str(rank_highest_during_interval_date): crypto_interval_data['Rank'].min()},
            'rank_lowest_during_interval': {str(rank_lowest_during_interval_date): crypto_interval_data['Rank'].max()},
            'rank_highest_lifetime': {str(rank_highest_lifetime_date): crypto_data['Rank'].min()},
            'rank_lowest_lifetime': {str(rank_lowest_lifetime_date): crypto_data['Rank'].max()},
            'crypto_origin': {str(crypto_origin[crypto].date()): 'origin'},
            'crypto_origin_marketcap': {str(crypto_origin[crypto].date()): crypto_origin_marketcap[crypto]}
        }

    return crypto_ranks

In [72]:

# Test the function with x = 2 and the interval from January 15, 2023 to February 15, 2023
get_losing_rank_by_cryptos(3, '2023-01-15', '2023-02-15')


{'xido-finance': {'rank_start': {'2023-01-15': 242.0},
  'rank_end': {'2023-02-15': 444.0},
  'rank_highest_during_interval': {'2023-01-19T00:00:00.000000000': 213.0},
  'rank_lowest_during_interval': {'2023-02-15T00:00:00.000000000': 444.0},
  'rank_highest_lifetime': {'2022-03-19T00:00:00.000000000': 54.0},
  'rank_lowest_lifetime': {'2021-12-01T00:00:00.000000000': 579.0},
  'crypto_origin': {'2021-06-12': 'origin'},
  'crypto_origin_marketcap': {'2021-06-12': 888007.7397067309}},
 'usdx': {'rank_start': {'2023-01-15': 245.0},
  'rank_end': {'2023-02-15': 301.0},
  'rank_highest_during_interval': {'2023-01-15T00:00:00.000000000': 245.0},
  'rank_lowest_during_interval': {'2023-02-08T00:00:00.000000000': 307.0},
  'rank_highest_lifetime': {'2020-12-24T00:00:00.000000000': 206.0},
  'rank_lowest_lifetime': {'2021-05-25T00:00:00.000000000': 430.0},
  'crypto_origin': {'2020-11-14': 'origin'},
  'crypto_origin_marketcap': {'2020-11-14': 12963377.268486941}},
 'lido-staked-sol': {'rank_s

In [73]:
def get_losing_rank_between_by_cryptos(x, start_date, end_date):
    # Convert start_date and end_date to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter the data to only include the specified interval
    interval_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Find the cryptocurrencies that were in the top x*100 at the start of the interval
    in_top_x_start = interval_data[interval_data['Date'] == start_date]
    in_top_x_start = in_top_x_start[in_top_x_start['Rank'] <= x*100]['Crypto'].unique()

    # Find the cryptocurrencies that were between x*100 and (x+1)*100 at the end of the interval
    between_top_x_end = interval_data[interval_data['Date'] == end_date]
    between_top_x_end = between_top_x_end[(between_top_x_end['Rank'] > x*100) & (between_top_x_end['Rank'] <= (x+1)*100)]['Crypto'].unique()

    # Find the cryptocurrencies that were in the top x*100 at the start of the interval but between x*100 and (x+1)*100 at the end
    left_top_x = [crypto for crypto in in_top_x_start if crypto in between_top_x_end]

    # Get rank start, rank end, highest and lowest rank during interval, and highest and lowest rank lifetime for each crypto
    crypto_ranks = {}
    for crypto in left_top_x:
        crypto_data = df[df['Crypto'] == crypto]
        crypto_interval_data = interval_data[interval_data['Crypto'] == crypto]
        rank_highest_during_interval_date = crypto_interval_data[crypto_interval_data['Rank'] == crypto_interval_data['Rank'].min()]['Date'].values[0]
        rank_lowest_during_interval_date = crypto_interval_data[crypto_interval_data['Rank'] == crypto_interval_data['Rank'].max()]['Date'].values[0]
        rank_highest_lifetime_date = crypto_data[crypto_data['Rank'] == crypto_data['Rank'].min()]['Date'].values[0]
        rank_lowest_lifetime_date = crypto_data[crypto_data['Rank'] == crypto_data['Rank'].max()]['Date'].values[0]
        crypto_ranks[crypto] = {
            'rank_start': {str(start_date.date()): crypto_interval_data[crypto_interval_data['Date'] == start_date]['Rank'].values[0]},
            'rank_end': {str(end_date.date()): crypto_interval_data[crypto_interval_data['Date'] == end_date]['Rank'].values[0]},
            'rank_highest_during_interval': {str(rank_highest_during_interval_date): crypto_interval_data['Rank'].min()},
            'rank_lowest_during_interval': {str(rank_lowest_during_interval_date): crypto_interval_data['Rank'].max()},
            'rank_highest_lifetime': {str(rank_highest_lifetime_date): crypto_data['Rank'].min()},
            'rank_lowest_lifetime': {str(rank_lowest_lifetime_date): crypto_data['Rank'].max()},
            'crypto_origin': {str(crypto_origin[crypto].date()): 'origin'},
            'crypto_origin_marketcap': {str(crypto_origin[crypto].date()): crypto_origin_marketcap[crypto]}
        }

    return crypto_ranks


In [76]:
get_losing_rank_between_by_cryptos(5, '2023-01-15', '2023-02-15')

{'divi': {'rank_start': {'2023-01-15': 374.0},
  'rank_end': {'2023-02-15': 544.0},
  'rank_highest_during_interval': {'2023-01-15T00:00:00.000000000': 374.0},
  'rank_lowest_during_interval': {'2023-02-09T00:00:00.000000000': 564.0},
  'rank_highest_lifetime': {'2020-07-15T00:00:00.000000000': 63.0},
  'rank_lowest_lifetime': {'2023-02-27T00:00:00.000000000': 677.0},
  'crypto_origin': {'2018-10-16': 'origin'},
  'crypto_origin_marketcap': {'2018-10-16': 5899791.350244191}},
 'asd': {'rank_start': {'2023-01-15': 405.0},
  'rank_end': {'2023-02-15': 517.0},
  'rank_highest_during_interval': {'2023-01-17T00:00:00.000000000': 388.0},
  'rank_lowest_during_interval': {'2023-02-12T00:00:00.000000000': 545.0},
  'rank_highest_lifetime': {'2021-03-28T00:00:00.000000000': 47.0},
  'rank_lowest_lifetime': {'2022-07-12T00:00:00.000000000': 592.0},
  'crypto_origin': {'2019-06-14': 'origin'},
  'crypto_origin_marketcap': {'2019-06-14': 52671552.55900357}},
 'saito': {'rank_start': {'2023-01-15':

In [77]:
def get_top_gainers(start_date, end_date):
    # Convert start_date and end_date to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter the data to only include the specified interval
    interval_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Calculate the rank change for each cryptocurrency during the interval
    rank_changes = {}
    for crypto in interval_data['Crypto'].unique():
        crypto_data = interval_data[interval_data['Crypto'] == crypto]
        rank_start = crypto_data[crypto_data['Date'] == start_date]['Rank'].values[0] if start_date in crypto_data['Date'].values else None
        rank_end = crypto_data[crypto_data['Date'] == end_date]['Rank'].values[0] if end_date in crypto_data['Date'].values else None
        if rank_start is not None and rank_end is not None:
            rank_changes[crypto] = rank_start - rank_end

    # Sort the cryptocurrencies by rank change in descending order
    rank_changes = {k: v for k, v in sorted(rank_changes.items(), key=lambda item: item[1], reverse=True)}

    return rank_changes

In [29]:
def get_top_gainers(start_date, end_date):
    # Convert start_date and end_date to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    
    # Filter the data to only include the specified interval
    interval_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Calculate the rank change for each cryptocurrency during the interval
    rank_changes = {}
    for crypto in interval_data['Crypto'].unique():
        crypto_data = interval_data[interval_data['Crypto'] == crypto]
        rank_start = crypto_data[crypto_data['Date'] == start_date]['Rank'].values[0] if start_date in crypto_data['Date'].values else None
        rank_end = crypto_data[crypto_data['Date'] == end_date]['Rank'].values[0] if end_date in crypto_data['Date'].values else None
        rank_highest_during_interval_date = crypto_data[crypto_data['Rank'] == crypto_data['Rank'].min()]['Date'].values[0] if not crypto_data.empty else None
        rank_lowest_during_interval_date = crypto_data[crypto_data['Rank'] == crypto_data['Rank'].max()]['Date'].values[0] if not crypto_data.empty else None
        if rank_start is not None and rank_end is not None:
            rank_changes[crypto] = {
                'rank_change': rank_start - rank_end,
                'rank_start': {str(start_date.date()): rank_start},
                'rank_end': {str(end_date.date()): rank_end},
                'rank_highest_during_interval': {str(rank_highest_during_interval_date): crypto_data['Rank'].min()} if rank_highest_during_interval_date is not None else None,
                'rank_lowest_during_interval': {str(rank_lowest_during_interval_date): crypto_data['Rank'].max()} if rank_lowest_during_interval_date is not None else None
            }

    # Sort the cryptocurrencies by rank change in descending order
    rank_changes = {k: v for k, v in sorted(rank_changes.items(), key=lambda item: item[1]['rank_change'], reverse=True)}

    return rank_changes


In [30]:

# Test the function with the interval from January 15, 2023, to February 15, 2023
get_top_gainers('2020-01-15', '2023-02-15')


{'smooth-love-potion': {'rank_change': 372.0,
  'rank_start': {'2020-01-15': 628.0},
  'rank_end': {'2023-02-15': 256.0},
  'rank_highest_during_interval': {'2022-02-11T00:00:00.000000000': 92.0},
  'rank_lowest_during_interval': {'2021-04-24T00:00:00.000000000': 802.0}},
 'coinex-token': {'rank_change': 299.0,
  'rank_start': {'2020-01-15': 542.0},
  'rank_end': {'2023-02-15': 243.0},
  'rank_highest_during_interval': {'2022-12-31T00:00:00.000000000': 153.0},
  'rank_lowest_during_interval': {'2020-09-04T00:00:00.000000000': 723.0}},
 'ergo': {'rank_change': 278.0,
  'rank_start': {'2020-01-15': 547.0},
  'rank_end': {'2023-02-15': 269.0},
  'rank_highest_during_interval': {'2021-09-12T00:00:00.000000000': 116.0},
  'rank_lowest_during_interval': {'2020-02-18T00:00:00.000000000': 562.0}},
 'hunt-token': {'rank_change': 174.0,
  'rank_start': {'2020-01-15': 550.0},
  'rank_end': {'2023-02-15': 376.0},
  'rank_highest_during_interval': {'2020-09-17T00:00:00.000000000': 303.0},
  'rank_l