In [84]:
# Import Libraries
import pandas as pd
import os

In [85]:
# Taking dataset from local file
df = r"F:\HSE\Consulting Project\btc_analysis\Bitcoin Historical Data.csv"

In [86]:
# Load the dataset via Pandas
btc = pd.read_csv(df)
btc

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,04/15/2025,84062.3,84586.8,86438.8,84047.7,59.50K,-0.62%
1,04/14/2025,84586.4,83752.8,85794.9,83705.2,78.03K,1.02%
2,04/13/2025,83734.4,85282.5,85999.5,83049.6,70.93K,-1.83%
3,04/12/2025,85292.7,83420.5,85893.5,82828.0,55.60K,2.25%
4,04/11/2025,83413.1,79606.1,84275.0,79004.2,94.62K,4.78%
...,...,...,...,...,...,...,...
1195,01/05/2022,43425.9,45833.1,47019.4,42535.1,83.74K,-5.26%
1196,01/04/2022,45837.3,46435.7,47505.4,45602.1,55.59K,-1.28%
1197,01/03/2022,46430.2,47293.9,47556.0,45704.0,41.06K,-1.86%
1198,01/02/2022,47311.8,47738.7,47944.9,46718.2,27.02K,-0.89%


In [87]:
btc.dtypes

Date        object
Price       object
Open        object
High        object
Low         object
Vol.        object
Change %    object
dtype: object

In [88]:
# Making all columns name in the lowercase
btc.columns = btc.columns.str.strip().str.lower()

In [89]:
btc.info

<bound method DataFrame.info of             date     price      open      high       low    vol. change %
0     04/15/2025  84,062.3  84,586.8  86,438.8  84,047.7  59.50K   -0.62%
1     04/14/2025  84,586.4  83,752.8  85,794.9  83,705.2  78.03K    1.02%
2     04/13/2025  83,734.4  85,282.5  85,999.5  83,049.6  70.93K   -1.83%
3     04/12/2025  85,292.7  83,420.5  85,893.5  82,828.0  55.60K    2.25%
4     04/11/2025  83,413.1  79,606.1  84,275.0  79,004.2  94.62K    4.78%
...          ...       ...       ...       ...       ...     ...      ...
1195  01/05/2022  43,425.9  45,833.1  47,019.4  42,535.1  83.74K   -5.26%
1196  01/04/2022  45,837.3  46,435.7  47,505.4  45,602.1  55.59K   -1.28%
1197  01/03/2022  46,430.2  47,293.9  47,556.0  45,704.0  41.06K   -1.86%
1198  01/02/2022  47,311.8  47,738.7  47,944.9  46,718.2  27.02K   -0.89%
1199  01/01/2022  47,738.0  46,217.5  47,917.6  46,217.5  31.24K    3.29%

[1200 rows x 7 columns]>

In [90]:
#Rename 'volume' to 'Volume'
btc.rename(columns={'vol.': 'volume'}, inplace=True)
btc.rename(columns={'open': 'opening_price'}, inplace=True)
btc.rename(columns={'high': 'highest_price'}, inplace=True)
btc.rename(columns={'low': 'lowest_price'}, inplace=True)

In [91]:
# Clean and convert 'change' column
btc ['change'] = btc['change %'].str.replace('%', '', regex=False).astype(float)

# Drop original 'change %' column
btc = btc.drop(columns=['change %'])

In [92]:
# Convert 'date' to datetime
btc['date'] = pd.to_datetime(btc['date'], errors='coerce')

# Convert amount-related columns to int
amount_columns = ['price', 'opening_price', 'highest_price', 'lowest_price']

for col in amount_columns:
    btc[col] = (
        btc[col]
        .astype(str)
        .str.replace(',', '', regex=False)
        .astype(float)
        .round(0)
        .astype(int)
    )


In [93]:
# Ensure the column is string and clean 'K'
btc['volume'] = (
    btc['volume']
    .astype(str)
    .str.replace('K', '', regex=False)
)

# Convert to numeric, force errors to NaN
btc['volume'] = pd.to_numeric(btc['volume'], errors='coerce')

# Multiply by 1000 to convert from 'K', handle NaNs safely
btc['volume'] = (btc['volume'] * 1000).round()

# Fill NaNs (if any) with 0 or any fallback value before converting to int
btc['volume'] = btc['volume'].fillna(0).astype(int)

In [94]:
btc.dtypes

date             datetime64[ns]
price                     int32
opening_price             int32
highest_price             int32
lowest_price              int32
volume                    int32
change                  float64
dtype: object

In [99]:
btc_price_info = btc
btc_price_info

Unnamed: 0,date,price,opening_price,highest_price,lowest_price,volume,change
0,2025-04-15,84062,84587,86439,84048,59500,-0.62
1,2025-04-14,84586,83753,85795,83705,78030,1.02
2,2025-04-13,83734,85282,86000,83050,70930,-1.83
3,2025-04-12,85293,83420,85894,82828,55600,2.25
4,2025-04-11,83413,79606,84275,79004,94620,4.78
...,...,...,...,...,...,...,...
1195,2022-01-05,43426,45833,47019,42535,83740,-5.26
1196,2022-01-04,45837,46436,47505,45602,55590,-1.28
1197,2022-01-03,46430,47294,47556,45704,41060,-1.86
1198,2022-01-02,47312,47739,47945,46718,27020,-0.89


In [100]:
# Save updated dataset in the local folder
btc_price_info.to_csv(r"F:\HSE\Consulting Project\btc_analysis\btc_price_info.csv", index=False, encoding='utf-8-sig')