In [1]:
import pandas as pd
import numpy as np
import requests
from pycoingecko import CoinGeckoAPI
import time
from itertools import zip_longest
import datetime

In [2]:
# Declaro la función de la api de coingecko en una variable cg
cg = CoinGeckoAPI()

In [3]:
# Lista de las critomonedas seleccionadas para análisis
crypto_list = ['bitcoin', 'ethereum', 'ripple', 'solana', 'okb', 'arbitrum', 'optimism', 'elrond-erd-2','boba-network', 'gmd-protocol']

#### Creación del primer dataset de la base de datos cryptocurrencies.csv

In [5]:
coin_market = cg.get_coins_markets(vs_currency='usd', ids=crypto_list)
df_market = pd.DataFrame(coin_market, columns=["id", "symbol", "name", "image", "total_supply", "max_supply"])

In [7]:
df_market.head(10)

Unnamed: 0,id,symbol,name,image,total_supply,max_supply
0,bitcoin,btc,Bitcoin,https://assets.coingecko.com/coins/images/1/la...,21000000.0,21000000.0
1,ethereum,eth,Ethereum,https://assets.coingecko.com/coins/images/279/...,120210000.0,
2,ripple,xrp,XRP,https://assets.coingecko.com/coins/images/44/l...,99988500000.0,100000000000.0
3,solana,sol,Solana,https://assets.coingecko.com/coins/images/4128...,555465100.0,
4,okb,okb,OKB,https://assets.coingecko.com/coins/images/4463...,235957700.0,300000000.0
5,arbitrum,arb,Arbitrum,https://assets.coingecko.com/coins/images/1654...,10000000000.0,10000000000.0
6,optimism,op,Optimism,https://assets.coingecko.com/coins/images/2524...,4294967000.0,4294967000.0
7,elrond-erd-2,egld,MultiversX,https://assets.coingecko.com/coins/images/1233...,25864120.0,31415930.0
8,boba-network,boba,Boba Network,https://assets.coingecko.com/coins/images/2028...,500000000.0,500000000.0
9,gmd-protocol,gmd,GMD,https://assets.coingecko.com/coins/images/2808...,80000.0,80000.0


In [118]:
df_market.to_csv("data/launch/cryptocurrencies.csv", index=False)

#### Se extrae más información del historial de las cryptomonedas a través de un `while` ya que la api solo permite una fecha a la vez.
#### `NOTA:` Se extrae desde la fecha 2013-04-28 ya que esa es la data más antigua que nos dá coingecko, lo ideal es obtenerla desde sus comienzos 2010, pero en este proyecto usaremos la solución Coingecko.

In [32]:
# Definir las fechas de inicio y fin
start_date = pd.to_datetime("2013-04-28")
end_date = pd.to_datetime("2023-08-17")


# Crear un diccionario para almacenar los DataFrames de cada criptomoneda
crypto_dataframes = {}

# Realizar extracciones diarias limitadas a 25 por minuto para cada criptomoneda
for crypto in crypto_list:
    print(f"Procesando {crypto}...")
    current_date = start_date
    crypto_historical_data = []
    while current_date <= end_date:
        # Obtener el historial para la fecha actual
        data = cg.get_coin_history_by_id(id=crypto, date=current_date.strftime("%d-%m-%Y"), localization='false')
        # Agregar la fecha como una columna
        data['date'] = current_date
        # Agregar los datos a la lista
        crypto_historical_data.append(data)
        # Esperar 2.4 segundos
        time.sleep(1.8)
        # Avanzar a la siguiente fecha
        current_date += pd.DateOffset(days=1)
    
    # Convertir la lista de datos en un DataFrame
    crypto_df = pd.json_normalize(crypto_historical_data)
    crypto_dataframes[crypto] = crypto_df
    crypto_dataframes[crypto].to_csv("data/original/" + crypto + "_cg_full.csv", index=False)

Procesando bitcoin...




Procesando ethereum...




Procesando ripple...
Procesando solana...
Procesando okb...




Procesando arbitrum...
Procesando optimism...
Procesando elrond-erd-2...
Procesando boba-network...
Procesando gmd-protocol...


In [281]:
# Se organiza cada dataframe guardado en diccionario de forma independiente
df_bitcoin = crypto_dataframes["bitcoin"]
df_ethereum = crypto_dataframes["ethereum"]
df_ripple = crypto_dataframes["ripple"]
df_solana = crypto_dataframes["solana"]
df_okb = crypto_dataframes["okb"]
df_arbitrum = crypto_dataframes["arbitrum"]
df_optimism = crypto_dataframes["optimism"]
df_elrond = crypto_dataframes["elrond-erd-2"]
df_boba = crypto_dataframes["boba-network"]
df_gmd = crypto_dataframes["gmd-protocol"]

#### Función de Limpieza General

In [282]:
def clean_data(df_crypto):
    # Drop columns: 'market_data.current_price.aed', 'market_data.current_price.ars' and 5 other columns
    df_crypto = df_crypto.drop(columns=['market_data.current_price.aed', 'market_data.current_price.ars', 'market_data.current_price.aud', 'market_data.current_price.bdt', 'market_data.current_price.bhd', 'market_data.current_price.bmd', 'market_data.current_price.brl'])
    # Drop columns: 'image.thumb', 'image.small' and 137 other columns
    df_crypto = df_crypto.drop(columns=['image.thumb', 'image.small', 'market_data.current_price.cad', 'market_data.current_price.chf', 'market_data.current_price.clp', 'market_data.current_price.cny', 'market_data.current_price.czk', 'market_data.current_price.dkk', 'market_data.current_price.gbp', 'market_data.current_price.hkd', 'market_data.current_price.huf', 'market_data.current_price.idr', 'market_data.current_price.ils', 'market_data.current_price.inr', 'market_data.current_price.jpy', 'market_data.current_price.krw', 'market_data.current_price.kwd', 'market_data.current_price.lkr', 'market_data.current_price.ltc', 'market_data.current_price.mmk', 'market_data.current_price.mxn', 'market_data.current_price.myr', 'market_data.current_price.ngn', 'market_data.current_price.nok', 'market_data.current_price.nzd', 'market_data.current_price.php', 'market_data.current_price.pkr', 'market_data.current_price.pln', 'market_data.current_price.rub', 'market_data.current_price.sar', 'market_data.current_price.sek', 'market_data.current_price.sgd', 'market_data.current_price.thb', 'market_data.current_price.try', 'market_data.current_price.twd', 'market_data.current_price.uah', 'market_data.current_price.vef', 'market_data.current_price.vnd', 'market_data.current_price.xag', 'market_data.current_price.xau', 'market_data.current_price.xdr', 'market_data.current_price.zar', 'market_data.current_price.bits', 'market_data.market_cap.aed', 'market_data.market_cap.ars', 'market_data.market_cap.aud', 'market_data.market_cap.bdt', 'market_data.market_cap.bhd', 'market_data.market_cap.bmd', 'market_data.market_cap.brl', 'market_data.market_cap.cad', 'market_data.market_cap.chf', 'market_data.market_cap.clp', 'market_data.market_cap.cny', 'market_data.market_cap.czk', 'market_data.market_cap.dkk', 'market_data.market_cap.gbp', 'market_data.market_cap.hkd', 'market_data.market_cap.huf', 'market_data.market_cap.idr', 'market_data.market_cap.ils', 'market_data.market_cap.inr', 'market_data.market_cap.jpy', 'market_data.market_cap.krw', 'market_data.market_cap.kwd', 'market_data.market_cap.lkr', 'market_data.market_cap.ltc', 'market_data.market_cap.mmk', 'market_data.market_cap.mxn', 'market_data.market_cap.myr', 'market_data.market_cap.ngn', 'market_data.market_cap.nok', 'market_data.market_cap.nzd', 'market_data.market_cap.php', 'market_data.market_cap.pkr', 'market_data.market_cap.pln', 'market_data.market_cap.rub', 'market_data.market_cap.sar', 'market_data.market_cap.sek', 'market_data.market_cap.sgd', 'market_data.market_cap.thb', 'market_data.market_cap.try', 'market_data.market_cap.twd', 'market_data.market_cap.uah', 'market_data.market_cap.vef', 'market_data.market_cap.vnd', 'market_data.market_cap.xag', 'market_data.market_cap.xau', 'market_data.market_cap.xdr', 'market_data.market_cap.zar', 'market_data.market_cap.bits', 'market_data.total_volume.aed', 'market_data.total_volume.ars', 'market_data.total_volume.aud', 'market_data.total_volume.bdt', 'market_data.total_volume.bhd', 'market_data.total_volume.bmd', 'market_data.total_volume.brl', 'market_data.total_volume.cad', 'market_data.total_volume.chf', 'market_data.total_volume.clp', 'market_data.total_volume.cny', 'market_data.total_volume.czk', 'market_data.total_volume.dkk', 'market_data.total_volume.gbp', 'market_data.total_volume.hkd', 'market_data.total_volume.huf', 'market_data.total_volume.idr', 'market_data.total_volume.ils', 'market_data.total_volume.inr', 'market_data.total_volume.jpy', 'market_data.total_volume.krw', 'market_data.total_volume.kwd', 'market_data.total_volume.lkr', 'market_data.total_volume.ltc', 'market_data.total_volume.mmk', 'market_data.total_volume.mxn', 'market_data.total_volume.myr', 'market_data.total_volume.ngn', 'market_data.total_volume.nok', 'market_data.total_volume.nzd', 'market_data.total_volume.php', 'market_data.total_volume.pkr', 'market_data.total_volume.pln', 'market_data.total_volume.rub', 'market_data.total_volume.sar', 'market_data.total_volume.sek', 'market_data.total_volume.sgd', 'market_data.total_volume.thb', 'market_data.total_volume.try', 'market_data.total_volume.twd', 'market_data.total_volume.uah', 'market_data.total_volume.vef', 'market_data.total_volume.vnd', 'market_data.total_volume.xag', 'market_data.total_volume.xau', 'market_data.total_volume.xdr', 'market_data.total_volume.zar', 'market_data.total_volume.bits'])
    # Drop columns: 'community_data.facebook_likes', 'market_data.total_volume.dot' and 22 other columns
    df_crypto = df_crypto.drop(columns=['community_data.facebook_likes', 'market_data.total_volume.dot', 'market_data.market_cap.dot', 'market_data.current_price.dot', 'market_data.total_volume.yfi', 'market_data.market_cap.yfi', 'market_data.current_price.yfi', 'market_data.total_volume.xlm', 'market_data.total_volume.eos', 'market_data.market_cap.xlm', 'market_data.market_cap.eos', 'market_data.current_price.xlm', 'market_data.current_price.eos', 'market_data.total_volume.link', 'market_data.market_cap.link', 'market_data.current_price.link', 'market_data.total_volume.bnb', 'market_data.market_cap.bnb', 'market_data.current_price.bnb', 'market_data.total_volume.bch', 'market_data.market_cap.bch', 'market_data.current_price.bch', 'public_interest_stats.bing_matches', 'public_interest_stats.alexa_rank'])
    # Drop columns: 'developer_data.code_additions_deletions_4_weeks.additions', 'developer_data.code_additions_deletions_4_weeks.deletions', 'developer_data.commit_count_4_weeks'
    df_crypto = df_crypto.drop(columns=['developer_data.code_additions_deletions_4_weeks.additions', 'developer_data.code_additions_deletions_4_weeks.deletions', 'developer_data.commit_count_4_weeks'])
    # Drop columns: 'market_data.current_price.xrp', 'market_data.market_cap.xrp', 'market_data.total_volume.xrp'
    df_crypto = df_crypto.drop(columns=['market_data.current_price.xrp', 'market_data.market_cap.xrp', 'market_data.total_volume.xrp'])
    df_crypto = df_crypto.dropna(subset=['market_data.current_price.usd'])
    # Drop columns: 'name', 'symbol'
    df_crypto = df_crypto.drop(columns=['name', 'symbol'])
    # Renombrar la columna 'id' a 'cryptocurrency_id'
    df_crypto.rename(columns={'id': 'cryptocurrency_id'}, inplace=True)
    df_crypto = df_crypto[["cryptocurrency_id","date","market_data.current_price.btc","market_data.current_price.eth","market_data.current_price.eur","market_data.current_price.usd","market_data.current_price.sats","market_data.market_cap.btc","market_data.market_cap.eth","market_data.market_cap.eur","market_data.market_cap.usd","market_data.market_cap.sats","market_data.total_volume.btc","market_data.total_volume.eth","market_data.total_volume.eur","market_data.total_volume.usd","market_data.total_volume.sats","community_data.twitter_followers","community_data.reddit_average_posts_48h","community_data.reddit_average_comments_48h","community_data.reddit_subscribers","community_data.reddit_accounts_active_48h","developer_data.forks","developer_data.stars","developer_data.subscribers","developer_data.total_issues","developer_data.closed_issues","developer_data.pull_requests_merged","developer_data.pull_request_contributors"]]
    return df_crypto

In [283]:
df_bitcoin = clean_data(df_bitcoin)
df_ethereum = clean_data(df_ethereum)
df_ripple = clean_data(df_ripple)
df_solana = clean_data(df_solana)
df_okb = clean_data(df_okb)
df_arbitrum = clean_data(df_arbitrum)
df_optimism = clean_data(df_optimism)
df_elrond = clean_data(df_elrond)
df_boba = clean_data(df_boba)
df_gmd = clean_data(df_gmd)

In [285]:
# Cargamos en datasets como datos originales
df_bitcoin.to_csv("data/original/bitcoin_cg_full.csv", index=False)
df_ethereum.to_csv("data/original/ethereum_cg_full.csv", index=False)
df_ripple.to_csv("data/original/ripple_cg_full.csv", index=False)
df_solana.to_csv("data/original/solana_cg_full.csv", index=False)
df_okb.to_csv("data/original/okb_cg_full.csv", index=False)
df_arbitrum.to_csv("data/original/arbitrum_cg_full.csv", index=False)
df_optimism.to_csv("data/original/optimism_cg_full.csv", index=False)
df_elrond.to_csv("data/original/elrond-erd-2_cg_full.csv", index=False)
df_boba.to_csv("data/original/boba-network_cg_full.csv", index=False)
df_gmd.to_csv("data/original/gmd-protocol_cg_full.csv", index=False)

In [2]:
# Leemos en datasets originales
df_bitcoin = pd.read_csv("data/original/bitcoin_cg_full.csv")
df_ethereum = pd.read_csv("data/original/ethereum_cg_full.csv")
df_ripple = pd.read_csv("data/original/ripple_cg_full.csv")
df_solana = pd.read_csv("data/original/solana_cg_full.csv")
df_okb = pd.read_csv("data/original/okb_cg_full.csv")
df_arbitrum = pd.read_csv("data/original/arbitrum_cg_full.csv")
df_optimism = pd.read_csv("data/original/optimism_cg_full.csv")
df_elrond = pd.read_csv("data/original/elrond-erd-2_cg_full.csv")
df_boba = pd.read_csv("data/original/boba-network_cg_full.csv")
df_gmd = pd.read_csv("data/original/gmd-protocol_cg_full.csv")

In [10]:
df_ripple.head()

Unnamed: 0,cryptocurrency_id,date,market_data.current_price.btc,market_data.current_price.eth,market_data.current_price.eur,market_data.current_price.usd,market_data.current_price.sats,market_data.market_cap.btc,market_data.market_cap.eth,market_data.market_cap.eur,...,community_data.reddit_average_comments_48h,community_data.reddit_subscribers,community_data.reddit_accounts_active_48h,developer_data.forks,developer_data.stars,developer_data.subscribers,developer_data.total_issues,developer_data.closed_issues,developer_data.pull_requests_merged,developer_data.pull_request_contributors
0,ripple,2013-08-04,5.6e-05,,0.004069,0.005874,,437342.920356,,31811270.0,...,0.0,,,,,,,,,
1,ripple,2013-08-05,5.3e-05,,0.003868,0.005653,,414384.116764,,30239020.0,...,0.0,,,,,,,,,
2,ripple,2013-08-06,4.4e-05,,0.003226,0.004669,,344345.210821,,25216920.0,...,0.0,,,,,,,,,
3,ripple,2013-08-07,4.2e-05,,0.003085,0.004486,,330862.482169,,24115470.0,...,0.0,,,,,,,,,
4,ripple,2013-08-08,4.1e-05,,0.002924,0.004196,,321507.588981,,22857490.0,...,0.0,,,,,,,,,


### **Creación de los datasets de la fase beta**

#### `Función para crear el dataset de prices`

In [3]:
def dataset_crypto_prices(df_cryptocurrency):
    # Drop columns: 'community_data.twitter_followers', 'community_data.reddit_average_posts_48h' and 10 other columns
    df_cryptocurrency = df_cryptocurrency.drop(columns=['community_data.twitter_followers', 'community_data.reddit_average_posts_48h', 'community_data.reddit_average_comments_48h', 'community_data.reddit_subscribers', 'community_data.reddit_accounts_active_48h', 'developer_data.forks', 'developer_data.stars', 'developer_data.subscribers', 'developer_data.total_issues', 'developer_data.closed_issues', 'developer_data.pull_requests_merged', 'developer_data.pull_request_contributors'])
    # Change column type to datetime64[ns] for column: 'date'
    df_cryptocurrency = df_cryptocurrency.astype({'date': 'datetime64[ns]'})
    # Rename column 'date' to 'timestamp'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'date': 'timestamp'})
    # Rename column 'market_data.current_price.btc' to 'price_btc'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.current_price.btc': 'price_btc'})
    # Rename column 'market_data.current_price.eth' to 'price_eth'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.current_price.eth': 'price_eth'})
    # Rename column 'market_data.current_price.eur' to 'price_eur'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.current_price.eur': 'price_eur'})
    # Rename column 'market_data.current_price.usd' to 'price_usd'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.current_price.usd': 'price_usd'})
    # Rename column 'market_data.current_price.sats' to 'price_satoshis'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.current_price.sats': 'price_satoshis'})
    # Rename column 'market_data.market_cap.btc' to 'market_cap_btc'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.market_cap.btc': 'market_cap_btc'})
    # Rename column 'market_data.market_cap.eth' to 'market_cap_eth'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.market_cap.eth': 'market_cap_eth'})
    # Rename column 'market_data.market_cap.eur' to 'market_cap_eur'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.market_cap.eur': 'market_cap_eur'})
    # Rename column 'market_data.market_cap.usd' to 'market_cap_usd'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.market_cap.usd': 'market_cap_usd'})
    # Rename column 'market_data.market_cap.sats' to 'market_cap_satoshis'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.market_cap.sats': 'market_cap_satoshis'})
    # Rename column 'market_data.total_volume.btc' to 'total_volume_btc'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.total_volume.btc': 'total_volume_btc'})
    # Rename column 'market_data.total_volume.eth' to 'total_volume_eth'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.total_volume.eth': 'total_volume_eth'})
    # Rename column 'market_data.total_volume.eur' to 'total_volume_eur'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.total_volume.eur': 'total_volume_eur'})
    # Rename column 'market_data.total_volume.usd' to 'total_volume_usd'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.total_volume.usd': 'total_volume_usd'})
    # Rename column 'market_data.total_volume.sats' to 'total_volume_satoshis'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'market_data.total_volume.sats': 'total_volume_satoshis'})
    return df_cryptocurrency

In [4]:
#Lista de las critomonedas seleccionadas para análisis
crypto_df_price_list = ['bitcoin', 'ethereum', 'ripple', 'solana', 'okb', 'arbitrum', 'optimism', 'elrond','boba', 'gmd']

In [5]:
# Crea un diccionario para almacenar los DataFrames
df_price = {}
for cryptocurrency in crypto_df_price_list:
    df_price[cryptocurrency] = dataset_crypto_prices(globals()['df_' + cryptocurrency].copy())
    df_price[cryptocurrency].to_csv("data/beta/"+ cryptocurrency + "_price.csv", index=False)

In [7]:
df_price["ripple"].head(1)

Unnamed: 0,cryptocurrency_id,timestamp,price_btc,price_eth,price_eur,price_usd,price_satoshis,market_cap_btc,market_cap_eth,market_cap_eur,market_cap_usd,market_cap_satoshis,total_volume_btc,total_volume_eth,total_volume_eur,total_volume_usd,total_volume_satoshis
0,ripple,2013-08-04,5.6e-05,,0.004069,0.005874,,437342.920356,,31811270.0,45921034.0,,0.0,,0.0,0.0,


#### `Función para crear el dataset de social`

In [304]:
def dataset_crypto_social(df_cryptocurrency):
    # Drop columns: 'market_data.current_price.btc', 'market_data.current_price.eth' and 20 other columns
    df_cryptocurrency = df_cryptocurrency.drop(columns=['market_data.current_price.btc', 'market_data.current_price.eth', 'market_data.current_price.eur', 'market_data.current_price.usd', 'market_data.current_price.sats', 'market_data.market_cap.btc', 'market_data.market_cap.eth', 'market_data.market_cap.eur', 'market_data.market_cap.usd', 'market_data.market_cap.sats', 'market_data.total_volume.btc', 'market_data.total_volume.eth', 'market_data.total_volume.eur', 'market_data.total_volume.usd', 'market_data.total_volume.sats', 'developer_data.forks', 'developer_data.stars', 'developer_data.subscribers', 'developer_data.total_issues', 'developer_data.closed_issues', 'developer_data.pull_requests_merged', 'developer_data.pull_request_contributors'])
    # Change column type to datetime64[ns] for column: 'date'
    df_cryptocurrency = df_cryptocurrency.astype({'date': 'datetime64[ns]'})
    # Rename column 'date' to 'timestamp'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'date': 'timestamp'})
    # Rename column 'community_data.twitter_followers' to 'twitter_followers'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'community_data.twitter_followers': 'twitter_followers'})
    # Rename column 'community_data.reddit_average_posts_48h' to 'reddit_average_posts_48h'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'community_data.reddit_average_posts_48h': 'reddit_average_posts_48h'})
    # Rename column 'community_data.reddit_average_comments_48h' to 'reddit_average_comments_48h'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'community_data.reddit_average_comments_48h': 'reddit_average_comments_48h'})
    # Rename column 'community_data.reddit_subscribers' to 'reddit_subscribers'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'community_data.reddit_subscribers': 'reddit_subscribers'})
    # Rename column 'community_data.reddit_accounts_active_48h' to 'reddit_accounts_active_48h'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'community_data.reddit_accounts_active_48h': 'reddit_accounts_active_48h'})
    # Drop rows with missing data in column: 'twitter_followers'
    df_cryptocurrency = df_cryptocurrency.dropna(subset=['twitter_followers'])
    return df_cryptocurrency


In [305]:
# Crea un diccionario para almacenar los DataFrames
df_social = {}
for cryptocurrency in crypto_df_price_list:
    df_social[cryptocurrency] = dataset_crypto_social(globals()['df_' + cryptocurrency].copy())
    df_social[cryptocurrency].to_csv("data/beta/"+ cryptocurrency + "_social.csv", index=False)

In [306]:
df_social["bitcoin"].head(1)

Unnamed: 0,cryptocurrency_id,timestamp,twitter_followers,reddit_average_posts_48h,reddit_average_comments_48h,reddit_subscribers,reddit_accounts_active_48h
325,bitcoin,2014-03-20,44768.0,0.0,0.0,,


#### `Función para crear el dataset de developers`

In [308]:
def dataset_crypto_dev(df_cryptocurrency):
    # Drop columns: 'market_data.current_price.btc', 'market_data.current_price.eth' and 18 other columns
    df_cryptocurrency = df_cryptocurrency.drop(columns=['market_data.current_price.btc', 'market_data.current_price.eth', 'market_data.current_price.eur', 'market_data.current_price.usd', 'market_data.current_price.sats', 'market_data.market_cap.btc', 'market_data.market_cap.eth', 'market_data.market_cap.eur', 'market_data.market_cap.usd', 'market_data.market_cap.sats', 'market_data.total_volume.btc', 'market_data.total_volume.eth', 'market_data.total_volume.eur', 'market_data.total_volume.usd', 'market_data.total_volume.sats', 'community_data.twitter_followers', 'community_data.reddit_average_posts_48h', 'community_data.reddit_average_comments_48h', 'community_data.reddit_subscribers', 'community_data.reddit_accounts_active_48h'])
    # Change column type to datetime64[ns] for column: 'date'
    df_cryptocurrency = df_cryptocurrency.astype({'date': 'datetime64[ns]'})
    # Rename column 'date' to 'timestamp'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'date': 'timestamp'})
    # Rename column 'developer_data.forks' to 'forks'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'developer_data.forks': 'forks'})
    # Rename column 'developer_data.stars' to 'stars'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'developer_data.stars': 'stars'})
    # Rename column 'developer_data.subscribers' to 'subscribers'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'developer_data.subscribers': 'subscribers'})
    # Rename column 'developer_data.total_issues' to 'total_issues'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'developer_data.total_issues': 'total_issues'})
    # Rename column 'developer_data.closed_issues' to 'closed_issues'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'developer_data.closed_issues': 'closed_issues'})
    # Rename column 'developer_data.pull_requests_merged' to 'pull_requests_merged'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'developer_data.pull_requests_merged': 'pull_requests_merged'})
    # Rename column 'developer_data.pull_request_contributors' to 'pull_request_contributors'
    df_cryptocurrency = df_cryptocurrency.rename(columns={'developer_data.pull_request_contributors': 'pull_request_contributors'})
    # Drop rows with missing data in column: 'forks'
    df_cryptocurrency = df_cryptocurrency.dropna(subset=['forks'])
    return df_cryptocurrency

In [309]:
# Crea un diccionario para almacenar los DataFrames
df_dev = {}
for cryptocurrency in crypto_df_price_list:
    df_dev[cryptocurrency] = dataset_crypto_dev(globals()['df_' + cryptocurrency].copy())
    df_dev[cryptocurrency].to_csv("data/beta/"+ cryptocurrency + "_dev.csv", index=False)

#### Histórico de las criptomonedas api coingecko, limitación a 3 columnas `price`, `market_cap`, `total_volume_24h`

In [16]:
def coin_market_range_by_crypto_list(cryptocurrency, from_date, to_date):
    
    data = cg.get_coin_market_chart_range_by_id(id=cryptocurrency,vs_currency='usd',from_timestamp=from_date,to_timestamp=to_date)

    # Obtener la longitud máxima de las listas
    max_len = max(len(data["prices"]), len(data["market_caps"]), len(data["total_volumes"]))

    # Llenar con NaN los valores faltantes en cada columna por separado
    data["prices"] += [[None, None]] * (max_len - len(data["prices"]))
    data["market_caps"] += [[None, None]] * (max_len - len(data["market_caps"]))
    data["total_volumes"] += [[None, None]] * (max_len - len(data["total_volumes"]))

    df = pd.DataFrame({
        "cryptocurrency_id": [cryptocurrency] * len(data["prices"]),
        "timestamp": [datetime.datetime.utcfromtimestamp(x[0] / 1000).strftime("%Y-%m-%d") for x in data["prices"]],
        "price_usd": [pd.to_numeric(x[1]) for x in data["prices"]],
        "market_cap": [round(pd.to_numeric(x[1]), 2) for x in data["market_caps"]],
        "total_volume_24h": [round(pd.to_numeric(x[1]), 2) for x in data["total_volumes"]]
    })
    return df

In [17]:
from_unix_timestamp = "1282073215"
to_unix_timestamp = "1692300415"
# Crea un diccionario para almacenar los DataFrames
df_history = {}
for cryptocurrency in crypto_list:
    df_history[cryptocurrency] = coin_market_range_by_crypto_list(cryptocurrency, from_unix_timestamp, to_unix_timestamp)
    df_history[cryptocurrency].to_csv("data/beta/"+ cryptocurrency + "_cg_history_range.csv", index=False)

#### `Creamos el dataset de los halving de Bitcoin`

#### `NOTA:` Un acontecimiento importante en el ecosistema de las criptomonedas es el halving de bitcoin el cual su comisión de minería se divide a la mitad cada 210.000 bloques es aproximadamente cada 4 años, este tiene una correlación con su ciclo alsista. **El Próximo es el 07 de abril de 2024**

In [9]:
# Crear un diccionario con los datos de los halvings de Bitcoin
data = {
    'id_halving': [0,1,2,3,4],
    'cryptocurrency_id': ["bitcoin","bitcoin","bitcoin","bitcoin","bitcoin"],
    'timestamp': ['2012-11-28', '2016-07-09', '2020-05-11', '2024-04-07', '2028-03-15'],  # Fechas de los halvings
    'previous_blocks': [210000, 420000, 630000, 840000, 1050000],                      # Número de bloques antes del halving
    'previous_reward': [50, 25, 12.5, 6.25, 3.125],                                 # Recompensa por bloque antes del halving (BTC)
    'new_reward': [25, 12.5, 6.25, 3.125, 1.5625]                                # Recompensa por bloque después del halving (BTC)
}

# Crear un DataFrame a partir del diccionario
df_bitcoin_halvings = pd.DataFrame(data)

# Convertir la columna 'Fecha' al formato datetime
df_bitcoin_halvings = df_bitcoin_halvings.astype({'timestamp': 'datetime64[ns]'})
df_bitcoin_halvings[["previous_reward", "new_reward"]] = round(df_bitcoin_halvings[["previous_reward", "new_reward"]], 2)

In [10]:
# Mostrar el DataFrame
df_bitcoin_halvings.head()

Unnamed: 0,id_halving,cryptocurrency_id,timestamp,previous_blocks,previous_reward,new_reward
0,0,bitcoin,2012-11-28,210000,50.0,25.0
1,1,bitcoin,2016-07-09,420000,25.0,12.5
2,2,bitcoin,2020-05-11,630000,12.5,6.25
3,3,bitcoin,2024-04-07,840000,6.25,3.12
4,4,bitcoin,2028-03-15,1050000,3.12,1.56


In [11]:
# Guardamos el dataset
df_bitcoin_halvings.to_csv("data/beta/bitcoin_halvings.csv", index=False)