In [21]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Load and Preprocess Data

# Load Bitcoin historical data
crypto_df = pd.read_csv(r"C:\Users\vaish\OneDrive\UH\Project\coin_Bitcoin.csv")

# Load Macroeconomic Indicators data
macro_df = pd.read_csv(r"C:\Users\vaish\OneDrive\UH\Project\economic_indicators.csv")

# Convert Date columns to datetime format
crypto_df['Date'] = pd.to_datetime(crypto_df['Date'])
macro_df['Date'] = pd.to_datetime(macro_df['Date'])

# Drop 'SNo' column if it exists (it does not add analytical value)
crypto_df.drop(columns=['SNo'], inplace=True, errors='ignore')


In [23]:
# Preprocessing

# Resample Cryptocurrency Data to Monthly Frequency

# Select only numeric columns for resampling
numeric_columns = crypto_df.select_dtypes(include=['number']).columns

# Resample the dataset to monthly frequency (ME = Month End)
crypto_df = crypto_df.set_index('Date').resample('ME')[numeric_columns].mean().reset_index()

# Merge Cryptocurrency Data with Macroeconomic Data

# Merge datasets on 'Date' using a left join
merged_df = pd.merge(crypto_df, macro_df, on='Date', how='left')

# Handle missing values using forward fill (ffill ensures consistent data)
merged_df.ffill(inplace=True)

In [25]:
# Select Key Countries for Analysis

# Define selected countries for a balanced, in-depth analysis
selected_countries = ['USA', 'China', 'Germany', 'UK', 'India']

# Drop 'Unemployment Rate (%)' as it is less relevant to Bitcoin price movements
merged_df.drop(columns=['Unemployment Rate (%)'], inplace=True, errors='ignore')

# Filter dataset for the selected countries only
merged_df = merged_df[merged_df['Country'].isin(selected_countries)]

# Save merged dataset BEFORE feature engineering
merged_df.to_csv(r"C:\Users\vaish\OneDrive\UH\Project\merged_crypto_macro_data.csv", index=False)
print("Merged dataset saved successfully!")

Merged dataset saved successfully!
