In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
from datetime import datetime

today = datetime.today().strftime("%Y-%m-%d")

In [2]:
df = pd.read_csv("files/trade_data.csv", index_col="ticker")
df = df[df.date==df.date.max()]

## Scatter Plots

#### Untransformed features

In [3]:
cd = np.empty(shape=(df.shape[0], 3, 1), dtype=float)
cd[:, 0] = df.bm.to_numpy().reshape(-1, 1)
cd[:, 1] = df.mom12m.to_numpy().reshape(-1, 1)
cd[:, 2] = df.roeq.to_numpy().reshape(-1, 1)
string = """
    %{text}<br>
    bm = %{customdata[0]:.2}<br>
    mom12m = %{customdata[1]:.2}<br>
    roeq = %{customdata[2]:.2}<extra></extra>
    """
    
fig = px.scatter_matrix(df[["bm", "mom12m", "roeq", "predict"]])
fig.update_traces(
    text=df.index.to_list(),
    customdata=cd,
    hovertemplate=string,
)
fig.show()

#### Transformed features

In [4]:
cd = np.empty(shape=(df.shape[0], 3, 1), dtype=float)
cd[:, 0] = df.tbm.to_numpy().reshape(-1, 1)
cd[:, 1] = df.tmom12m.to_numpy().reshape(-1, 1)
cd[:, 2] = df.troeq.to_numpy().reshape(-1, 1)
string = """
    %{text}<br>
    tbm = %{customdata[0]:.2}<br>
    tmom12m = %{customdata[1]:.2}<br>
    troeq = %{customdata[2]:.2}<extra></extra>
    """
    
fig = px.scatter_matrix(df[["tbm", "tmom12m", "troeq", "predict"]])
fig.update_traces(
    text=df.index.to_list(),
    customdata=cd,
    hovertemplate=string,
)
fig.show()

## Best and worst stocks

In [5]:
df["rnk"] = df.predict.rank(method="first")
df["portfolio"] = [
    "worst" if x<=200 else ("best" if x>df.shape[0]-200 else "middle") for x in df.rnk
]
df2 = df[df.portfolio.isin(("worst", "best"))].copy()


#### Feature histograms

In [6]:
fig = px.histogram(
    df2, 
    x="tbm", 
    color="portfolio", 
    color_discrete_map=dict(
        best="green",
        worst="red"
    )
)
fig.show()

In [7]:
fig = px.histogram(
    df2, 
    x="tmom12m", 
    color="portfolio", 
    color_discrete_map=dict(
        best="green",
        worst="red"
    )
)
fig.show()

In [8]:
fig = px.histogram(
    df2, 
    x="troeq", 
    color="portfolio", 
    color_discrete_map=dict(
        best="green",
        worst="red"
    )
)

fig.show()

#### Industries

In [9]:
best = df[df.portfolio=="best"]
counts = best.groupby("famaindustry").predict.count()
counts = counts.rename(index={"Almost Nothing": "Other"})
counts = counts.reset_index()
counts.columns = ["industry", "count"]

fig = px.pie(counts, values="count", names="industry")

fig.show()


In [10]:
worst = df[df.portfolio=="worst"]
counts = worst.groupby("famaindustry").predict.count()
counts = counts.rename(index={"Almost Nothing": "Other"})
counts = counts.reset_index()
counts.columns = ["industry", "count"]

fig = px.pie(counts, values="count", names="industry")

fig.show()