In [4]:
import yfinance as yf
import pandas as pd

In [5]:
# Define the crypto symbols for major coins
symbols = ["BTC-USD", "ETH-USD", "BNB-USD"]

In [6]:
# Fetch historical data (last 30 days, daily interval)
data = yf.download(symbols, period="30d", interval="1d")

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  3 of 3 completed


In [8]:
# Reset index to make 'Date' a column
data = data.reset_index()

In [9]:
# Flatten the multi-index columns (e.g., 'Close' for each coin)
data.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in data.columns]

In [10]:
# Rename columns for clarity
data = data.rename(columns={
    "Date": "date",
    "Open_BTC-USD": "open_btc",
    "High_BTC-USD": "high_btc",
    "Low_BTC-USD": "low_btc",
    "Close_BTC-USD": "close_btc",
    "Adj Close_BTC-USD": "adj_close_btc",
    "Volume_BTC-USD": "volume_btc",
    "Open_ETH-USD": "open_eth",
    "High_ETH-USD": "high_eth",
    "Low_ETH-USD": "low_eth",
    "Close_ETH-USD": "close_eth",
    "Adj Close_ETH-USD": "adj_close_eth",
    "Volume_ETH-USD": "volume_eth",
    "Open_BNB-USD": "open_bnb",
    "High_BNB-USD": "high_bnb",
    "Low_BNB-USD": "low_bnb",
    "Close_BNB-USD": "close_bnb",
    "Adj Close_BNB-USD": "adj_close_bnb",
    "Volume_BNB-USD": "volume_bnb"
})

In [11]:
# Introduce some missing values and outliers for learning purposes
data.loc[5:7, "close_btc"] = float("nan")
data.loc[10, "volume_eth"] = 1e12  # Outlier
data.loc[15, "close_bnb"] = 0      # Outlier (unrealistic price)

In [12]:
# Save to CSV
data.to_csv("crypto_market_data.csv", index=False)
print("Crypto market data saved to crypto_market_data.csv")

Crypto market data saved to crypto_market_data.csv


What You’ll Learn:

df.shape: Shows the number of rows and columns (should be around 30 rows, 19 columns).
df.head(): Displays the first 5 rows to get a sense of the data.
df.info(): Shows column names, data types, and non-null counts (you’ll notice missing values in close_btc).
df.describe(): Provides summary statistics like mean, min, max, and quartiles for numerical columns.

In [18]:
df = pd.read_csv("crypto_market_data.csv")
df.head()

Unnamed: 0,index_,Date_,close_bnb,close_btc,close_eth,high_bnb,high_btc,high_eth,low_bnb,low_btc,low_eth,open_bnb,open_btc,open_eth,volume_bnb,volume_btc,volume_eth
0,0,2025-03-28,619.474915,84353.148438,1895.50293,637.241882,87489.859375,2015.454346,615.92511,83557.640625,1863.01062,637.206421,87185.234375,2002.410522,1608298526,34198619509,18160526498
1,1,2025-03-29,603.237854,82597.585938,1827.320312,623.212402,84567.335938,1911.900879,598.218811,81634.140625,1799.200806,619.474609,84352.070312,1895.549683,1367332800,16969396135,12194771785
2,2,2025-03-30,601.8302,82334.523438,1806.218628,612.789368,83505.0,1847.570557,594.214355,81573.25,1769.41272,603.191345,82596.984375,1827.311035,1204098085,14763760943,9854857162
3,3,2025-03-31,605.072815,82548.914062,1823.47998,608.501099,83870.125,1852.551392,588.086182,81293.890625,1778.692261,601.8302,82336.0625,1806.316528,1538796118,29004228247,15765030938
4,4,2025-04-01,611.297119,85169.171875,1905.491455,618.123779,85487.367188,1926.302979,604.754761,82429.359375,1820.350342,605.072815,82551.921875,1823.562378,2091803391,28175650319,15001220420


In [13]:
# Load the dataset
df = pd.read_csv("crypto_market_data.csv")
print("Dataset shape:", df.shape)
print("\nFirst 5 rows:\n", df.head())
print("\nColumn info:\n", df.info())
print("\nBasic statistics:\n", df.describe())

Dataset shape: (30, 17)

First 5 rows:
    index_       Date_   close_bnb     close_btc    close_eth    high_bnb  \
0       0  2025-03-28  619.474915  84353.148438  1895.502930  637.241882   
1       1  2025-03-29  603.237854  82597.585938  1827.320312  623.212402   
2       2  2025-03-30  601.830200  82334.523438  1806.218628  612.789368   
3       3  2025-03-31  605.072815  82548.914062  1823.479980  608.501099   
4       4  2025-04-01  611.297119  85169.171875  1905.491455  618.123779   

       high_btc     high_eth     low_bnb       low_btc      low_eth  \
0  87489.859375  2015.454346  615.925110  83557.640625  1863.010620   
1  84567.335938  1911.900879  598.218811  81634.140625  1799.200806   
2  83505.000000  1847.570557  594.214355  81573.250000  1769.412720   
3  83870.125000  1852.551392  588.086182  81293.890625  1778.692261   
4  85487.367188  1926.302979  604.754761  82429.359375  1820.350342   

     open_bnb      open_btc     open_eth  volume_bnb   volume_btc   volume_e