# Configuración del ambiente

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import requests
from bs4 import BeautifulSoup
# Definimos las variables globales
global df_bitcoin, precio_actual, tendencia, media_bitcoin,  algoritmo_decision, df_bitcoin_limpio

# Obtención de datos

## Precios Históricos del Bitcoin

In [2]:
def importar_base_bitcoin():
    global df_bitcoin, precio_actual, tendencia, media_bitcoin,  algoritmo_decision
    datos_bitcoin = yf.Ticker('BTC-USD')
    df_bitcoin = pd.DataFrame(datos_bitcoin.history(period = '7d', interval = '5m'))

importar_base_bitcoin()
df_bitcoin.head(10)
    

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-10-25 00:00:00+00:00,33916.042969,33941.207031,33916.042969,33941.207031,0,0.0,0.0
2023-10-25 00:05:00+00:00,33938.441406,33942.09375,33914.734375,33936.296875,0,0.0,0.0
2023-10-25 00:10:00+00:00,33935.175781,33950.28125,33886.140625,33886.140625,0,0.0,0.0
2023-10-25 00:15:00+00:00,33852.980469,33910.554688,33852.980469,33910.554688,16486400,0.0,0.0
2023-10-25 00:20:00+00:00,33933.507812,33934.132812,33913.386719,33913.386719,0,0.0,0.0
2023-10-25 00:25:00+00:00,33918.3125,33923.777344,33868.476562,33868.476562,0,0.0,0.0
2023-10-25 00:30:00+00:00,33828.261719,33828.261719,33810.691406,33810.691406,0,0.0,0.0
2023-10-25 00:35:00+00:00,33813.429688,33831.039062,33804.660156,33831.039062,0,0.0,0.0
2023-10-25 00:40:00+00:00,33836.089844,33846.183594,33835.234375,33846.183594,0,0.0,0.0
2023-10-25 00:45:00+00:00,33846.617188,33952.453125,33846.617188,33952.453125,0,0.0,0.0


## Extrayendo tendencias

In [3]:
def extraer_tendencias():
    # Definimos las variables globales
    global df_bitcoin, precio_actual, tendencia, media_bitcoin,  algoritmo_decision
    # Hacemos WebScraping de la página https://coinmarketcap.com
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36'}
    url = 'https://coinmarketcap.com'
    respuesta = requests.get(url, headers = headers)
    soup = BeautifulSoup(respuesta.content, features = 'lxml')
    # Obtenemos los símbolos de las criptomonedas
    posicion_BTC = soup.findAll('p', {'class': 'sc-4984dd93-0 iqdbQL coin-item-symbol'})
    # Ahora ubicamos el índice de la posición del BTC(Bitcoin)
    indice = 0
    for pos in posicion_BTC:
        if pos.getText() == 'BTC':
            break
        indice += 1
    print('El índice en la tabla para la posicion del BTC es: ', indice)
    
    # Obtenemos de la página el precio de Bitcoin
    precios = soup.findAll('div', {'class':['sc-a0353bbc-0 gDrtaY','sc-a0353bbc-0 gDrtaY fall','sc-a0353bbc-0 gDrtaY rise']})
    # Guardamos el precio en la variable precio_actual
    # con el índice que obtuvimos en el anterior bucle for
    # a su vez también quitamos el $ y la , del string 
    # y por último lo convertimos a una variable float
    precio_actual = float(precios[indice].getText().replace('$', '').replace(',', ''))
    print('El precio actual del Bitcoin en este momento es: ', precio_actual)
    
    # Obtenemos las tendencias de 1hr para el Bitcoin
    tendencias = soup.findAll('span', {'class':['sc-d55c02b-0 gUnzUB','sc-d55c02b-0 iwhBxy']})[indice]
    # Preguntamos con un if si en la clase del span esta icon-Caret-down 
    # esto quiere decir que la tendencia es baja, 
    # de lo contrario la tendencia es alta
    tendencia = ''
    if tendencias.find('span')['class'][indice] == 'icon-Caret-down':
        tendencia = 'baja'
    else:
        tendencia = 'alta'
    print('La tendencia actual del bitcoin es: ', tendencia)
    
extraer_tendencias()

El índice en la tabla para la posicion del BTC es:  0
El precio actual del Bitcoin en este momento es:  34482.19
La tendencia actual del bitcoin es:  alta


# Limpieza de datos

In [4]:
def limpieza_datos():
    
    # Definimos las variables globales
    global df_bitcoin, precio_actual, tendencia, media_bitcoin, algoritmo_decision, df_bitcoin_limpio
    
    # Creamos una copia del DataFrame odiginal para realizar la limpieza de los datos
    df_bitcoin_limpio = df_bitcoin.copy()
    
    # Eliminamos repetidos de los índices y nos quedamos con los índices únicos
    if df_bitcoin_limpio.index.duplicated().sum() > 0:
        df_bitcoin_limpio = df_bitcoin_limpio[~df_bitcoin_limpio.index.duplicated()]
        
    # ELiminamos las filas con valor nulo en la columna 'close'
    df_bitcoin_limpio.dropna(subset = ['Close'], inplace=True)
    
    # Verificamos si los registro de la columna  'volume' tenga transacciones > a 0
    # sino los eliminamos
    volumen_mayor_cero = (df_bitcoin_limpio['Volume'] > 0)
    df_bitcoin_limpio = df_bitcoin_limpio[volumen_mayor_cero]
    
    # Cambiamos los valores del índice actual que es la columna Datetime, 
    # sustituyéndolos por un entero que comienza en 0
    df_bitcoin_limpio.reset_index(inplace = True)
    
    # Eliminamos los outliers en el precio del Bitcoin, columna Close, 
    # utilizamos un gráfico de boxplot para identificarlos.
    
    # Calculamos el precio promedio del Bitcoin con el DataFrame original
    promedio_original = df_bitcoin.Close.mean()
    
    

In [5]:
limpieza_datos()
df_bitcoin_limpio

Unnamed: 0,Datetime,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2023-10-25 00:15:00+00:00,33852.980469,33910.554688,33852.980469,33910.554688,16486400,0.0,0.0
1,2023-10-25 07:45:00+00:00,33816.480469,33822.468750,33709.109375,33822.468750,53430272,0.0,0.0
2,2023-10-25 07:50:00+00:00,33803.085938,33868.492188,33803.085938,33868.492188,8282112,0.0,0.0
3,2023-10-25 09:15:00+00:00,34063.812500,34071.816406,34047.765625,34047.765625,3244032,0.0,0.0
4,2023-10-25 09:20:00+00:00,34054.421875,34088.605469,34054.421875,34088.605469,2584576,0.0,0.0
...,...,...,...,...,...,...,...,...
792,2023-10-31 16:25:00+00:00,34336.195312,34358.515625,34336.195312,34353.238281,1253376,0.0,0.0
793,2023-10-31 16:30:00+00:00,34360.492188,34360.492188,34347.644531,34347.644531,3068928,0.0,0.0
794,2023-10-31 16:35:00+00:00,34355.500000,34355.500000,34309.015625,34348.507812,984064,0.0,0.0
795,2023-10-31 16:40:00+00:00,34357.730469,34488.308594,34357.730469,34488.308594,62163968,0.0,0.0


In [9]:
df_bitcoin.boxplot(['Volume'])

<Axes: >