#### List of Frames
- stock_df (base frame)
- crypto_df (base frame)
- companies_df (base frame)



In [95]:
# Import Modules

import pandas as pd
from datetime import timedelta


In [96]:
# Load Stock & Crypto Dataframes

stock_csv = "stocks.csv"
stock_df = pd.read_csv(stock_csv)

crypto_csv = "cryptocurrency.csv"
crypto_df = pd.read_csv(crypto_csv)

companies_csv = "companies.csv"
companies_df = pd.read_csv(companies_csv)

In [97]:
# Cleaning Stock Data

stock_df = stock_df.replace({'\%': ''}, regex=True)
stock_df = stock_df.replace({'\+': ''}, regex=True)
stock_df = stock_df.rename(columns = {'last': 'price_usd'})

def convert_volume(value):
    """Convert volume strings with K, M, B suffixes to numbers"""
    if pd.isna(value):
        return value
    
    value = str(value).strip()
    
    if value.endswith('K'):
        return float(value[:-1]) * 1_000
    elif value.endswith('M'):
        return float(value[:-1]) * 1_000_000
    elif value.endswith('B'):
        return float(value[:-1]) * 1_000_000_000
    else:
        return float(value)

stock_df['vol_'] = stock_df['vol_'].apply(convert_volume)
stock_df = stock_df.drop('ticker', axis=1, errors='ignore')

lookup = companies_df[['ticker', 'company name', 'short name']].copy()

lookup['company_lower'] = lookup['company name'].str.lower().str.strip()
lookup['short_lower'] = lookup['short name'].str.lower().str.strip()

stock_df['name_lower'] = stock_df['name'].str.lower().str.strip()

stock_df = stock_df.merge(lookup[['ticker', 'company_lower']], 
                          left_on='name_lower', 
                          right_on='company_lower', 
                          how='left', 
                          suffixes=('', '_drop'))

mask = stock_df['ticker'].isna()
temp_merge = stock_df[mask].drop('ticker', axis=1).merge(
    lookup[['ticker', 'short_lower']], 
    left_on='name_lower', 
    right_on='short_lower', 
    how='left'
)
stock_df.loc[mask, 'ticker'] = temp_merge['ticker'].values

stock_df = stock_df.drop(['name_lower', 'company_lower', 'short_lower'], axis=1, errors='ignore')

cols = stock_df.columns.tolist()
name_idx = cols.index('name')
ticker_idx = cols.index('ticker')
cols.insert(name_idx + 1, cols.pop(ticker_idx))
stock_df = stock_df[cols]

stock_df = stock_df.rename(columns = {'ticker': 'symbol'})


invalid escape sequence '\%'


invalid escape sequence '\+'


invalid escape sequence '\%'


invalid escape sequence '\+'


invalid escape sequence '\%'


invalid escape sequence '\+'



In [98]:
# Cleaning Crypto Data

crypto_df = crypto_df.replace({'\$': ''}, regex=True)
crypto_df = crypto_df.replace({'\%': ''}, regex=True)
crypto_df = crypto_df.replace({'\+': ''}, regex=True)


invalid escape sequence '\$'


invalid escape sequence '\%'


invalid escape sequence '\+'


invalid escape sequence '\$'


invalid escape sequence '\%'


invalid escape sequence '\+'


invalid escape sequence '\$'


invalid escape sequence '\%'


invalid escape sequence '\+'



In [99]:
# Cleaning Company Data

companies_df = companies_df.drop('logo', axis=1)
companies_df = companies_df.drop('market cap', axis=1)
companies_df = companies_df.drop('ceo', axis=1)
companies_df = companies_df.drop('website', axis=1)
companies_df = companies_df.drop('description', axis=1)

In [100]:
# Stock Base Frame 
stock_df.head(3)

Unnamed: 0,timestamp,name,symbol,price_usd,high,low,chg_,chg_%,vol_,time
0,2025-10-14 18:00:03,NVIDIA,NVDA,181.19,184.8,179.7,-7.13,-3.79,74860000.0,10:57:55
1,2025-10-14 18:00:03,Amazon.com,AMZN,216.19,216.92,212.6,-3.88,-1.76,16800000.0,10:58:30
2,2025-10-14 18:00:03,3M,,151.84,152.19,149.28,0.74,0.49,479840.0,10:58:34


In [101]:
# Crypto Base Frame
crypto_df.head(3)

Unnamed: 0,timestamp,name,symbol,price_usd,vol_24h,total_vol,chg_24h,chg_7d,market_cap
0,2025-10-14 18:42:04,Wrapped stETH,wsteth,4862.78,17303242.0,0.0,-3.08,-13.06,15541897587
1,2025-10-14 18:42:04,Figure Heloc,figr_heloc,1.0,5942967.0,0.0,2.52,0.17,12973755429
2,2025-10-14 18:42:04,Wrapped eETH,weeth,4312.52,13561342.0,0.0,-3.06,-13.12,10582898874


In [102]:
# Companies Base Frame
companies_df.head(3)

Unnamed: 0,ticker,company name,short name,industry,exchange,sector,tag 1,tag 2,tag 3
0,A,Agilent Technologies Inc.,Agilent,Medical Diagnostics & Research,New York Stock Exchange,Healthcare,Healthcare,Diagnostics & Research,Medical Diagnostics & Research
1,AA,Alcoa Corporation,Alcoa,Metals & Mining,New York Stock Exchange,Basic Materials,Basic Materials,Aluminum,Metals & Mining
2,AABA,Altaba Inc.,Altaba,Asset Management,Nasdaq Global Select,Financial Services,Financial Services,Asset Management,


In [103]:
stock_df.to_csv("website/stocks_cleaned.csv", index=False)

crypto_df.to_csv("website/crypto_cleaned.csv", index=False)

companies_df.to_csv("website/companies_cleaned.csv", index=False)

In [112]:
stock_df["timestamp"] = pd.to_datetime(stock_df["timestamp"], errors="coerce")
stock_df["month"] = stock_df["timestamp"].dt.month

df_april = stock_df[stock_df["month"] == 4].copy()

# Clean percent-change column
df_april["chg_%"] = (
df_april["chg_%"].astype(str)
.str.replace("%", "", regex=False)
.str.replace(",", "", regex=False)
.str.replace(r"[^\d\.\-]", "", regex=True)
)

df_april["chg_pct"] = pd.to_numeric(df_april["chg_%"], errors="coerce")
df_april["volume"] = pd.to_numeric(df_april["vol_"], errors="coerce")
df_april["price_usd"] = pd.to_numeric(df_april["price_usd"], errors="coerce")

agg_april = df_april.groupby(["name", "symbol"], as_index=False).agg({
"chg_pct": "mean",
"price_usd": "mean",
"volume": "mean"
})

agg_april = agg_april.merge(
agg[["symbol", "type"]].drop_duplicates(),
on="symbol",
how="left"
)

import plotly.express as px

color_discrete_map = {
"Crypto": "#FF5DA2",
"Stock": "#FFDD57"
}

agg_sorted = agg_april.sort_values("chg_pct", ascending=False)

rank_bar = px.bar(
agg_sorted,
x="chg_pct",
y="name",
color="type",
color_discrete_map=color_discrete_map,
orientation="h",
title="Top 30-Day Movers — Stocks (April Only)",
labels={"chg_pct": "Average % Change (30-Day)", "name": "Asset"},
hover_data={
"name": True,
"symbol": True,
"chg_pct": ":.2f",
"price_usd": ":.2f",
"volume": ":,.0f"
}
)

rank_bar.update_layout(
width=1100,
height=700,
margin=dict(l=40, r=40, t=80, b=40)
)

rank_bar.show()
rank_bar.write_html("python_vis/stock_crypto_rank_bar_april.html")

In [105]:
crypto_df["timestamp"] = pd.to_datetime(crypto_df["timestamp"], errors="coerce")
crypto_df["month"] = crypto_df["timestamp"].dt.month

crypto_april = crypto_df[crypto_df["month"] == 4].copy()

# Ensure a 'type' column exists so grouping doesn't fail
crypto_april["type"] = "Crypto"

# Clean percent change (use chg_7d for 30-day approximation)
crypto_april["chg_7d"] = (
crypto_april["chg_7d"]
.astype(str)
.str.replace("%", "", regex=False)
.str.replace(",", "", regex=False)
.str.replace(r"[^\d\.\-]", "", regex=True)
)

crypto_april["chg_pct"] = pd.to_numeric(crypto_april["chg_7d"], errors="coerce")

crypto_april["volume"] = pd.to_numeric(crypto_april["total_vol"], errors="coerce")
crypto_april["price_usd"] = pd.to_numeric(crypto_april["price_usd"], errors="coerce")

agg_crypto_april = crypto_april.groupby(
["name", "symbol", "type"],
as_index=False
).agg({
"chg_pct": "mean",
"price_usd": "mean",
"volume": "mean"
})

import plotly.express as px

color_discrete_map = {
"Crypto": "#FF5DA2",
"Stock": "#FFDD57"
}

agg_sorted = agg_crypto_april.sort_values("chg_pct", ascending=False)

rank_bar_crypto = px.bar(
agg_sorted,
x="chg_pct",
y="name",
color="type",
color_discrete_map=color_discrete_map,
orientation="h",
title="Top 30-Day Movers — Crypto Only (April)",
labels={"chg_pct": "Average % Change (7-Day → Used for April)", "name": "Asset"},
hover_data={
"symbol": True,
"chg_pct": ":.2f",
"price_usd": ":.2f",
"volume": ":,.0f"
}
)

rank_bar_crypto.update_layout(width=1100, height=700, margin=dict(l=40, r=40, t=80, b=40))
rank_bar_crypto.show()
rank_bar_crypto.write_html("python_vis/crypto_rank_bar_april.html")

Takeaway:

This visualization illustrates how the stock market behaves on a day-to-day basis by tracking the average percent change of stock prices over time. When viewed across multiple months, the chart reveals that most daily changes remain clustered close to zero, meaning that the market tends to move gradually rather than through extreme daily swings. The few noticeable spikes and dips correspond to periods of unusual volatility—likely earnings releases, macroeconomic news, or shifts in investor sentiment—which briefly disrupt the otherwise stable trend. The interactive time filters (1W, 2W, 1M, All) allow users to zoom into shorter windows and uncover short-term patterns, such as momentum surges, brief pullbacks, or stabilization phases. Together, these insights help contextualize how dynamic the market is, making it easier for viewers to understand whether recent price movements are typical fluctuations or indicators of emerging trends.

Design Idea:

The design prioritizes clarity, readability, and user exploration. A simple blue line paired with circular markers highlights each daily data point while keeping the visual clean and easy to interpret. The range slider beneath the chart gives users the freedom to examine both high-level overview patterns and detailed micro-movements, making the visualization more interactive without overwhelming the viewer. Large typography, a balanced use of white space, and the removal of unnecessary visual clutter all contribute to a professional and unobtrusive design. Soft gridlines and muted axis colors support the data rather than distract from it, ensuring the viewer’s focus stays on the daily performance trend. Overall, the design aims to present complex time-series data in a smooth, intuitive format that invites users to investigate the market’s rhythm over time.

In [106]:
import numpy as np
import pandas as pd
import plotly.express as px

stock = stock_df.copy()
stock["timestamp"] = pd.to_datetime(stock["timestamp"], errors="coerce")
for col in ["price_usd","chg_%","vol_"]:
    stock[col] = (stock[col].astype(str)
                  .str.replace(",", "", regex=False)
                  .str.replace(r"[^\d\.\-]", "", regex=True))
    stock[col] = pd.to_numeric(stock[col], errors="coerce")
stock["type"] = "Stock"

crypto = crypto_df.copy()
crypto["timestamp"] = pd.to_datetime(crypto["timestamp"], errors="coerce")
for col in ["price_usd","chg_24h","vol_24h"]:
    crypto[col] = (crypto[col].astype(str)
                   .str.replace(",", "", regex=False)
                   .str.replace(r"[^\d\.\-]", "", regex=True))
    crypto[col] = pd.to_numeric(crypto[col], errors="coerce")
crypto = crypto.rename(columns={"chg_24h":"chg_%","vol_24h":"vol_"})
crypto["type"] = "Crypto"

# Basic cleaning
df = pd.concat([
    stock[["type","name","symbol","timestamp","price_usd","chg_%","vol_"]],
    crypto[["type","name","symbol","timestamp","price_usd","chg_%","vol_"]],
], ignore_index=True)

df["symbol"] = df["symbol"].fillna(df["name"]).astype(str)
df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=["price_usd","chg_%","vol_"])
df = df[df["vol_"] > 0]

# Limit to a recent window
end = df["timestamp"].max()
start = end - pd.Timedelta(days=30)
df = df[(df["timestamp"] >= start) & (df["timestamp"] <= end)]

agg = (df.groupby(["type","symbol","name"], as_index=False)
         .agg(price_usd=("price_usd","median"),   
              chg_pct=("chg_%","mean"),           
              volume=("vol_","sum")))             

# Get rid of extreme outliers
lo, hi = agg["chg_pct"].quantile([0.02, 0.98])
agg = agg[(agg["chg_pct"] >= lo) & (agg["chg_pct"] <= hi)]

# Keep top N
N = 40
agg = (agg.sort_values("volume", ascending=False)
          .groupby("type", group_keys=False).head(N))

sizeref = 2.0 * agg["volume"].max() / (55.0**2)  

In [107]:
# Enhanced Interactive Bubble Plot
fig = px.scatter(
    agg,
    x="price_usd",
    y="chg_pct",
    size="volume",
    color="type",
    color_discrete_map={
        "Crypto": "#FF5DA2",   
        "Stock":  "#FFDD57"    
    },
    hover_name="name",
    hover_data={
        "symbol": True,
        "price_usd": ":,.2f",
        "chg_pct": ":.2f",
        "volume": ":,.0f",
        "type": False
    },
    facet_col="type",
    facet_col_spacing=0.08,
    title="Price vs. Avg % Change — Top Traded Stocks & Crypto (Last 30 Days)"
)

# --- Improved marker style ---
fig.update_traces(
    marker=dict(
        sizemode="area",
        sizeref=sizeref,
        sizemin=5,
        opacity=0.7,
        line=dict(width=0.7, color="rgba(0,0,0,0.3)")
    ),
    selector=dict(mode="markers")
)

# --- Layout styling ---
fig.update_layout(
    template="plotly_white",
    height=700,
    margin=dict(l=60, r=40, t=80, b=60),
    legend_title_text="Asset Type",
    title=dict(font=dict(size=26, family="Arial", color="#1f1f1f"))
)

# --- Axis styling ---
fig.update_xaxes(
    title="Price (USD, Log Scale)",
    type="log",
    tickfont=dict(size=12),
    titlefont=dict(size=16),
    gridcolor="lightgray"
)

fig.update_yaxes(
    title="Average % Change (30-Day Mean)",
    tickfont=dict(size=12),
    titlefont=dict(size=16),
    gridcolor="lightgray"
)

# INTERACTIVE ELEMENT 2: Marker Size Toggle (Volume vs Equal Size)
fig.update_layout(
    updatemenus=list(fig.layout.updatemenus) + [
        dict(
            type="buttons",
            direction="right",
            x=0,
            y=1,
            xanchor="left",
            yanchor="bottom",
            pad=dict(l=2, r=2, t=2, b=2),  # smaller padding
            font=dict(size=8),           # smaller text
            showactive=True,
            buttons=[
                dict(
                    label="Use Volume Size",
                    method="restyle",
                    args=[{"marker.size": [agg["volume"].values.tolist()] * len(fig.data)}]
                ),
                dict(
                    label="Equal Size",
                    method="restyle",
                    args=[{"marker.size": [[12] * len(agg)] * len(fig.data)}]
                ),
            ]
        )
    ]
)
# INTERACTIVE ELEMENT 3: Hovertemplate Upgrade
fig.update_traces(
    hovertemplate="<b>%{hovertext}</b><br>" +
                  "Symbol: %{customdata[0]}<br>" +
                  "Price: $%{customdata[1]:,.2f}<br>" +
                  "Avg Change: %{customdata[2]:.2f}%<br>" +
                  "Volume: %{customdata[3]:,.0f}<extra></extra>"
)

fig.show()

fig.write_html(
    "python_vis/price_v_avg_change.html",
    include_plotlyjs="cdn",
    full_html=True
)

Takeaway:

This bubble plot highlights the relationship between price, average percent change, and trading volume across the most actively traded stocks and cryptocurrencies over the past 30 days. The visualization makes the differences between the two asset classes immediately apparent: cryptocurrencies exhibit much wider variation in both price and volatility, forming a dispersed cloud of points across the chart. Stocks, on the other hand, cluster tightly around lower levels of average percent change, demonstrating far more stability in their monthly performance. Larger bubbles—representing higher trading volume—help viewers pinpoint which assets are driving market activity, showing that some extremely volatile crypto assets also receive substantial trading attention. By combining three dimensions of data into a single chart, this visualization uncovers patterns that would be harder to detect when viewing these variables separately, making it clear how volatility and market behavior differ dramatically between stocks and crypto.

Design Idea:

The design of this visualization focuses on clarity, comparability, and visual storytelling. A faceted layout splits the chart into two side-by-side panels—one for crypto and one for stocks—so viewers can make immediate comparisons without visual overlap or confusion. Using a log scale for price allows assets ranging from a few dollars to tens of thousands to coexist in the same frame without compressing the lower range. Color-coding bubbles in bright pink (crypto) and yellow (stocks) provides instant category recognition and creates a visually appealing contrast. The bubble size toggle (Volume vs Equal Size) adds an interactive dimension, allowing users to shift the narrative between “Which assets move the market the most?” and “How do these assets compare independent of trading volume?” The clean, modern aesthetic, combined with subtle gridlines and thoughtfully placed axis labels, ensures that the viewer is never overwhelmed despite the multi-variable nature of the plot.

In [108]:
import plotly.express as px

# Color scheme
color_discrete_map = {
"Crypto": "#FF5DA2",
"Stock": "#FFDD57"
}

# Sort so bars appear in descending order of % change
agg_sorted = agg.sort_values("chg_pct", ascending=False)

rank_bar = px.bar(
agg_sorted,
x="chg_pct", # Average % change (30-day)
y="name", # Asset name
color="type", # Crypto vs Stock
color_discrete_map=color_discrete_map,
orientation="h", # horizontal bars like your Altair chart
title="Top 30-Day Movers — Stocks & Crypto",
labels={
"chg_pct": "Average % Change (30-Day)",
"name": "Asset"
},
hover_data={
"name": True,
"symbol": True,
"chg_pct": ":.2f",
"price_usd": ":.2f",
"volume": ":,.0f",
"type": False,
},
)

# Size
rank_bar.update_layout(
width=1100,
height=700,
margin=dict(l=40, r=40, t=80, b=40)
)

# Show in notebook
rank_bar.show()

# Save to HTML for your webpage
rank_bar.write_html("python_vis/top_movers.html")

Takeaway:

This horizontal bar chart ranks the top gainers and losers across stocks and cryptocurrencies over the past 30 days, offering a clear snapshot of which assets experienced the most dramatic momentum shifts. A key insight from the visualization is that cryptocurrencies dominate both the highest positive returns and the steepest losses, highlighting their inherently volatile nature compared to the more stable, consistently performing stocks. While a handful of stocks appear among the positive movers, their returns are modest relative to crypto’s magnitude, and the few stock entries that appear on the negative side also show far smaller declines. This ranking format allows users to immediately identify standout performers, emerging trends, and assets exhibiting unusual or extreme behavior. Overall, the chart demonstrates the sharp contrast in risk profile between the two markets, with crypto showing explosive movements and stocks offering more predictable, incremental changes.

Design Idea:

The bar chart is intentionally structured to emphasize readability and ranking clarity. A horizontal layout allows long asset names—especially cryptocurrencies with multi-word labels—to be displayed without truncation. Sorting the bars from highest to lowest percent change creates a natural narrative flow, guiding the viewer from top performers to the largest underperformers in one smooth scan. The dual-color scheme, using pink for crypto and yellow for stocks, reinforces category differences and visually exposes how one group dominates the extremes of the distribution. Minimalist gridlines, precise spacing, and consistent bar thickness ensure that the focus stays on comparing performance magnitudes rather than navigating visual noise. The design’s goal is to deliver a high-impact, insights-first visualization that helps users quickly understand where market movement is concentrated and which assets deserve closer attention.

In [109]:
# Create a combined dataframe from stocks
stock_combined = stock_df[['name', 'symbol', 'timestamp', 'vol_', 'chg_%']].copy()
stock_combined['date'] = pd.to_datetime(stock_combined['timestamp']).dt.strftime('%m-%d-%Y')
stock_combined = stock_combined.rename(columns={
   'vol_': 'volume',
   'chg_%': 'change_percent'
})


# Merge with companies_df to get sector information
stock_combined = stock_combined.merge(
   companies_df[['ticker', 'sector']],
   left_on='symbol',
   right_on='ticker',
   how='left'
).drop(columns=['ticker', 'timestamp'])


# Create a combined dataframe from crypto
crypto_combined = crypto_df[['name', 'symbol', 'timestamp', 'vol_24h', 'chg_24h']].copy()
crypto_combined['date'] = pd.to_datetime(crypto_combined['timestamp']).dt.strftime('%m-%d-%Y')
crypto_combined = crypto_combined.rename(columns={
   'vol_24h': 'volume',
   'chg_24h': 'change_percent'
})
crypto_combined['sector'] = None  # Crypto has no sector
crypto_combined = crypto_combined.drop(columns=['timestamp'])


# Combine both dataframes
sector_df = pd.concat([stock_combined, crypto_combined], ignore_index=True)


# Reorder columns for clarity
sector_df = sector_df[['name', 'symbol', 'date', 'volume', 'change_percent', 'sector']]


sector_df = sector_df.merge(
   companies_df[['ticker', 'tag 2']],
   left_on='symbol',
   right_on='ticker',
   how='left'
).drop(columns=['ticker'])


# Rename 'tag 2' to 'sector2'
sector_df = sector_df.rename(columns={'tag 2': 'sector2'})


# Check the result
sector_df.head()

Unnamed: 0,name,symbol,date,volume,change_percent,sector,sector2
0,NVIDIA,NVDA,10-14-2025,74860000.0,-3.79,Technology,Semiconductors
1,Amazon.com,AMZN,10-14-2025,16800000.0,-1.76,Consumer Cyclical,Specialty Retail
2,3M,,10-14-2025,479840.0,0.49,,
3,Walt Disney,,10-14-2025,1670000.0,0.72,,
4,Amgen,AMGN,10-14-2025,397200.0,1.19,Healthcare,Biotechnology


In [110]:
import altair as alt
import pandas as pd


# Reset and use default transformer
alt.data_transformers.enable('default')
alt.data_transformers.disable_max_rows()


# Create a proper datetime column from the date
sector_df['datetime'] = pd.to_datetime(sector_df['date'], format='%m-%d-%Y')


# Replace NaN sectors with 'Cryptocurrency'
sector_df['sector'] = sector_df['sector'].fillna('Cryptocurrency')
sector_df['sector2'] = sector_df['sector2'].fillna('Cryptocurrency')


# Convert change_percent to numeric, coercing errors to NaN
sector_df['change_percent'] = pd.to_numeric(sector_df['change_percent'], errors='coerce')


# Drop any rows where change_percent is NaN after conversion
sector_df_clean = sector_df.dropna(subset=['change_percent'])


# Create aggregated "All Stocks" data
all_stocks = sector_df_clean[sector_df_clean['sector'] != 'Cryptocurrency'].copy()
all_stocks_agg = all_stocks.groupby('datetime')['change_percent'].mean().reset_index()
all_stocks_agg = all_stocks_agg.sort_values('datetime')
all_stocks_agg['cumulative_change'] = all_stocks_agg['change_percent'].cumsum()
all_stocks_agg['display_category'] = 'All Stocks'
all_stocks_agg['view_type'] = 'sector'


# Create individual sector data
sector_performance = sector_df_clean.groupby(['datetime', 'sector'])['change_percent'].mean().reset_index()
sector_performance = sector_performance.sort_values(['sector', 'datetime'])
sector_performance['cumulative_change'] = sector_performance.groupby('sector')['change_percent'].cumsum()
sector_performance['display_category'] = sector_performance['sector']
sector_performance['view_type'] = 'sector'


# Create subsector data (sector2) - shown in subsector view
subsector_performance = sector_df_clean.groupby(['datetime', 'sector2'])['change_percent'].mean().reset_index()
subsector_performance = subsector_performance.sort_values(['sector2', 'datetime'])
subsector_performance['cumulative_change'] = subsector_performance.groupby('sector2')['change_percent'].cumsum()
subsector_performance['display_category'] = subsector_performance['sector2']
subsector_performance['view_type'] = 'subsector'


# Combine the data
combined_data = pd.concat([
   all_stocks_agg[['datetime', 'cumulative_change', 'display_category', 'change_percent', 'view_type']],
   sector_performance[['datetime', 'cumulative_change', 'display_category', 'change_percent', 'view_type']],
   subsector_performance[['datetime', 'cumulative_change', 'display_category', 'change_percent', 'view_type']]
], ignore_index=True)


# Add line width
combined_data['line_width'] = combined_data['display_category'].apply(
   lambda x: 4 if x in ['Cryptocurrency', 'All Stocks'] else 2
)


# Add color indicator for positive/negative
combined_data['gain_loss_status'] = combined_data['cumulative_change'].apply(
   lambda x: '🟢 Gain' if x > 0 else '🔴 Loss' if x < 0 else '⚪ Neutral'
)


# Create toggle for view type
view_toggle = alt.binding_radio(
   options=['sector', 'subsector'],
   labels=['Sector View', 'Subsector View (Detailed)'],
   name='View Mode: '
)
view_select = alt.selection_point(
   fields=['view_mode'],
   bind=view_toggle,
   value='sector'
)


# Create sector selection with default values (legend-based)
sector_select = alt.selection_point(
   fields=['display_category'],
   bind='legend',
   value=[{'display_category': 'All Stocks'}, {'display_category': 'Cryptocurrency'}]
)


# Hover selection - simpler version that just tracks the line being hovered
hover = alt.selection_point(
   fields=['display_category'],
   nearest=True,
   on='mouseover',
   empty=False,
   clear='mouseout'
)


# Expanded color palette - yellow, orange, red, pink, and purple spectrum
custom_colors = [
   '#FFD700',  # Gold
   '#FFC300',  # Bright yellow
   '#FFB347',  # Light orange
   '#FF9933',  # Orange
   '#FF8C00',  # Dark orange
   '#FF6600',  # Red-orange
   '#FF4500',  # Orange red
   '#FF0000',  # Pure red
   '#DC143C',  # Crimson
   '#C71585',  # Medium violet red
   '#FF1493',  # Deep pink
   '#FF69B4',  # Hot pink
   '#FF6B9D',  # Light pink
   '#E91E63',  # Pink
   '#D946EF',  # Fuchsia
   '#C026D3',  # Purple-pink
   '#9333EA',  # Purple
   '#7C3AED',  # Violet
   '#6366F1',  # Indigo
   '#8B5CF6'   # Purple
]


# Base line chart
lines = alt.Chart(combined_data).transform_calculate(
   view_mode='datum.view_type',
   text_color="datum.cumulative_change > 0 ? 'green' : datum.cumulative_change < 0 ? 'red' : 'gray'",
   formatted_pct="(datum.cumulative_change > 0 ? '+' : '') + format(datum.cumulative_change, '.2f') + '%'"
).transform_filter(
   view_select
).mark_line(point=True).encode(
   x=alt.X('datetime:T', title='Date'),
   y=alt.Y('cumulative_change:Q', title='Cumulative Change (%)'),
   color=alt.Color('display_category:N',
                   title='Category',
                   scale=alt.Scale(range=custom_colors),
                   legend=alt.Legend(orient='right')),
   strokeWidth=alt.StrokeWidth('line_width:Q', legend=None),
   strokeDash=alt.condition(
       alt.datum.display_category == 'Cryptocurrency',
       alt.value([8, 4]),
       alt.value([0])
   ),
   opacity=alt.condition(sector_select, alt.value(1), alt.value(0)),
   tooltip=[
       alt.Tooltip('display_category:N', title='Category'),
       alt.Tooltip('datetime:T', title='Date', format='%B %d, %Y'),
       alt.Tooltip('cumulative_change:Q', title='Total Gain/Loss (%)', format='.2f'),
       alt.Tooltip('gain_loss_status:N', title='Status'),
       alt.Tooltip('change_percent:Q', title='Daily Change (%)', format='.2f')
   ]
)


# Get most recent value for each category for the display
latest_values = combined_data.sort_values('datetime').groupby(['display_category', 'view_type']).last().reset_index()


# Large total gain/loss display in bottom right corner
total_display = alt.Chart(latest_values).transform_calculate(
   view_mode='datum.view_type',
   text_color="datum.cumulative_change > 0 ? 'green' : datum.cumulative_change < 0 ? 'red' : 'gray'",
   formatted_pct="(datum.cumulative_change > 0 ? '+' : '') + format(datum.cumulative_change, '.2f') + '%'"
).transform_filter(
   view_select
).mark_text(
   align='right',
   baseline='bottom',
   fontSize=40,
   fontWeight='bold',
   dx=-10,
   dy=-10
).encode(
   x=alt.value(880),
   y=alt.value(480),
   text=alt.condition(hover, 'formatted_pct:N', alt.value('')),
   color=alt.Color('text_color:N', scale=None, legend=None),
   opacity=alt.condition(hover, alt.value(1), alt.value(0))
)


# Category label for the total display
category_label = alt.Chart(latest_values).transform_calculate(
   view_mode='datum.view_type'
).transform_filter(
   view_select
).mark_text(
   align='right',
   baseline='bottom',
   fontSize=18,
   fontWeight='bold',
   dx=-10,
   dy=-55
).encode(
   x=alt.value(880),
   y=alt.value(480),
   text=alt.condition(hover, 'display_category:N', alt.value('')),
   color=alt.value('#333'),
   opacity=alt.condition(hover, alt.value(0.9), alt.value(0))
)


# Combine all layers
chart = (lines + total_display + category_label).add_params(
   view_select,
   sector_select,
   hover
).properties(
   width=900,
   height=500,
   title='Sector Performance (Hover over lines to see total gain/loss | Toggle view mode | Shift+Click legend for multiple)'
).configure_legend(
   titleFontSize=12,
   labelFontSize=11
).interactive()

chart

chart.save("python_vis/sector_performance.html")


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the conve

Takeaway:

This line plot highlights the performance of stocks across different industries. It shows the average trading price each day plotted across an entire date range. The visualization also includes all Cryptocurrencys aggregated as a comparison line to all stocks and subsections of stock. When plotting all industries against Crypto, results vary widely. When looking at this years data, only a single industry outperformed being Consumer Defensive stocks. Despite success within each industry, the entire weighed average still falls below Crypto gains for most categorys. Our visualization can break down these categorys further into subsectors. This allows for more detailed comparison and showcases a few niche sectors that outperform Crypto greatly. Home Furnishing & Banking lead these categorys at 80-90 percent cumulative gains. Our visualization helps uncover specific market trends that show behavior across specific sector and industires, highlighting recent and long term changes. 

Design Idea:

The design of this line plot focuses on simplicity and distinction between categorical variables. The use of different colors is done to map differences across the different stock sectors. The legend allows a user to hide/select certain categories to focus in on target industries. For example, a user researching companies to invest in can select "Semiconductors", "Solar', and "Telecom" to see changes across those industries. We also wanted users to be able to examine sectors within broader industries. This is why we have a more detailed selection with our Subsector View. This breaks our data down to an even finer grain allowing savvy investors deeper insights on many niche industries.