In [None]:
# Import the dependencies.
from pathlib import Path
import pandas as pd
from pprint import pprint
import pandera as pa


In [None]:
# save path to the new csv dataset as a variable
bit_coin_csv_path=Path("./new_CSV_files/new_bitcoin.csv")
# use panadas to read the data
bitcoin_df=pd.read_csv(bit_coin_csv_path)


In [None]:
# save path to the new csv dataset as a variable
cardano_coin_csv_path=Path("./new_CSV_files/new_cardano.csv")
# use panadas to read the data
cardano_df=pd.read_csv(cardano_coin_csv_path)

In [None]:
# save path to the new csv dataset as a variable
dogecoin_csv_path=Path("./new_CSV_files/new_dogecoin.csv")
# use panadas to read the data
dogecoin_df=pd.read_csv(dogecoin_csv_path)

In [None]:
# save path to the new csv dataset as a variable
ethereum_csv_path=Path("./new_CSV_files/new_ethereum.csv")
# use panadas to read the data
ethereum_df=pd.read_csv(ethereum_csv_path)

In [None]:
# save path to the new csv dataset as a variable
tether_csv_path=Path("./new_CSV_files/new_tether.csv")
# use panadas to read the data
tether_df=pd.read_csv(tether_csv_path)

In [None]:
# save path to the new csv dataset as a variable
xrp_csv_path=Path("./new_CSV_files/new_xrp.csv")
# use panadas to read the data
xrp_df=pd.read_csv(xrp_csv_path)

In [None]:
# Rename columns in each dataframe to avoid conflicts
df1 = bitcoin_df.rename(columns=lambda x: f"{x}_bitcoin_df" if x not in ['date'] else x)
df2 = cardano_df.rename(columns=lambda x: f"{x}_cardano_df" if x not in ['date'] else x)
df3 = dogecoin_df.rename(columns=lambda x: f"{x}_dogecoin_df" if x not in ['date'] else x)
df4 = ethereum_df.rename(columns=lambda x: f"{x}_ethereum_df" if x not in ['date'] else x)
df5 = tether_df.rename(columns=lambda x: f"{x}_tether_df" if x not in ['date'] else x)
df6 = xrp_df.rename(columns=lambda x: f"{x}_xrp_df" if x not in ['date'] else x)

In [None]:
# Merge dataframes based on the 'date' column
merged_df = pd.merge(df1, df2, on='date')
merged_df = pd.merge(merged_df, df3, on='date')
merged_df = pd.merge(merged_df, df4, on='date')
merged_df = pd.merge(merged_df, df5, on='date')
merged_df = pd.merge(merged_df, df6, on='date')

# Print the merged dataframe
merged_df

In [None]:
bitcoin_df.head()

In [None]:
# rename columns, some of them have spaces at the end which messes up the merge

#del bitcoin_df["SNo"]

cardano_new = cardano_df.rename(columns = {"name  ": "name"})
cardano_new = cardano_new.rename(columns = {"symbol  ": "symbol"})
#del cardano_new["SNO"]

dogecoin_new = dogecoin_df.rename(columns = {"name  ": "name"})
dogecoin_new = dogecoin_new.rename(columns = {"symbol  ": "symbol"})
#del dogecoin_new["SNo"]

ethereum_new = ethereum_df.rename(columns = {"name  ": "name"})
ethereum_new = ethereum_new.rename(columns = {"symbol  ": "symbol"})
#del ethereum_new["SNO"]

tether_new = tether_df.rename(columns = {"name  ": "name"})
tether_new = tether_new.rename(columns = {"symbol  ": "symbol"})
#del tether_new["SNO"]

xrp_new = xrp_df.rename(columns = {"name  ": "name"})
xrp_new = xrp_new.rename(columns = {"symbol  ": "symbol"})
#del xrp_new["SNo"]

xrp_new.columns

In [None]:
cardano_new

In [None]:
# merging on name and date 
merged_new = pd.merge(bitcoin_df, cardano_new, how="outer")
merged_new = pd.merge(merged_new, dogecoin_new, how="outer")
merged_new = pd.merge(merged_new, ethereum_new, how="outer")
merged_new = pd.merge(merged_new, tether_new, how="outer")
merged_new = pd.merge(merged_new, xrp_new, how="outer")

merged_new

In [None]:
# delete the columns which were used in the original csv as unique identifiers, they're no longer necessary
del merged_new["SNO"]
del merged_new["SNo"]

merged_new

In [None]:
# create schema for data validation

# define lists of names and symbols
# we will use these to check that name and symbol columns only have these values
coin_names = ["Bitcoin", "Cardano", "Dogecoin", "Ethereum", "Tether", "XRP"]
coin_symbols = ["BTC ", "ADA", "DOGE", "ETH", "USDT", "XRP"]


# schema defines a data type for each column, and implicitly checks for null values when checking the data type
# also will check that the rows are unique using combo of date, name and symbol columns
val_schema = pa.DataFrameSchema( columns={
    "name": pa.Column(str, pa.Check.isin(coin_names)),
    "symbol": pa.Column(str, pa.Check.isin(coin_symbols)),
    "date": pa.Column(str),
    "daily_high": pa.Column(float),
    "daily_low": pa.Column(float),
    "open_price": pa.Column(float),
    "close_price": pa.Column(float),
    "marketcap": pa.Column(float),
    "daily_percent_change": pa.Column(float),
    "liquidity_ratio": pa.Column(float),
    "true_range": pa.Column(float),
    "average_true_range": pa.Column(float),
    "volatility_ratio": pa.Column(float),
    "daily_percent_change_min_max": pa.Column(float)
    },
    unique=["date", "name", "symbol"])

In [None]:
# data validation 

# putting the check in a try-except block allows us to see more informative errors when we get them
# the lazy=True argument makes it so that all the errors are caught at once, rather than one at a time 
try:
    val_schema.validate(merged_new, lazy=True)
except pa.errors.SchemaErrors as err:
    print("Data validation errors:")
    print(err)

# no news is good news
# aka no output is what we want to see! any output here would be errors which we'd have to go back and resolve in the data

In [None]:
import matplotlib.pyplot as plt

# Load your Bitcoin data into a Pandas DataFrame
df = pd.read_csv("./new_CSV_files/new_bitcoin.csv")
# Scatter plot with bubble size based on Volume
plt.figure(figsize=(12, 8))
scatter = plt.scatter(df['marketcap'], df['close_price'], c=df['volume'], cmap='viridis', alpha=0.9, s=df['volume']/1e7)
# Adding labels and title
plt.title('Bubble Chart: MarketCap vs. Close Price with Bubble Size Representing Volume')
plt.xlabel('marketcap (USD)')
plt.ylabel('close_price (USD)')
# Adding colorbar to represent Volume
cbar = plt.colorbar(scatter, label='Volume')
# Display the plot
plt.show()

In [None]:
# Load your data into a Pandas DataFrame
df = pd.read_csv("./new_CSV_files/new_cardano.csv")
# Sorting DataFrame by 'Date' for chronological order
df = df.sort_values(by='date')
# Bar chart for daily percent change vs. liquidity ratio
plt.figure(figsize=(12, 8))
plt.bar(df['date'], df['daily_percent_change'], label='daily_percent_change', alpha=0.9, color='blue', align='center')
plt.bar(df['date'], df['close_price'], label='close_price', alpha=0.9, color='green', align='edge')
# Adding labels and title
plt.title('Bar Chart: Daily Percent Change vs. closing price')
plt.xlabel('closeprice')
plt.ylabel('Value')
# Adding legend
plt.legend()
# Rotating x-axis labels for better readability
plt.xticks(rotation=45, ha='right')
# Display the plot
plt.show()