In [None]:
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df = pd.read_csv("./Data/cleaned_govtrades.csv")
df["Estimated Holdings"] = df["Estimated Holdings"].str[1:].str.replace(",", "")
df["Estimated Holdings"] = df["Estimated Holdings"].astype(int)
df["Party"] = df["Senator"].str[-5]
df["State"] = df["Senator"].str[-3:-1]
df

In [None]:
state_holdings = df.groupby('State')['Estimated Holdings'].sum().reset_index()

In [None]:
scale = 1e5
fig = go.Figure(data=[
    go.Scattergeo(
    locationmode='USA-states',
    locations=state_holdings['State'],    
    text=(state_holdings['Estimated Holdings']/scale).round(2),
    showlegend=False,
    marker=dict(
        size=state_holdings['Estimated Holdings'] / scale, 
        color='skyblue',
        line=dict(width=1, color='DarkSlateGray'),
        sizemode='area',
        )

    ),
    go.Scattergeo(
        locationmode='USA-states',
        locations=state_holdings['State'],
        text=state_holdings['State'],  # State names near states
        mode='text',
        textposition='top left',  # State names above bubbles
        showlegend=False, #prevent legend from showing up.
    )
    ]
)

fig.update_layout(
    title_text='Estimated Holdings by State',
    geo=dict(
        scope='usa',
        # projection_type='albers usa',
        # showlakes=True,
        lakecolor='rgb(255, 255, 255)'
    )
)
fig.show()

In [None]:
def getSumCount(df, group_col, agg_col):
    grouped_holdings = df.groupby(group_col).agg({agg_col: ["sum", "count"]}).reset_index()
    grouped_holdings.columns = [' '.join(col).strip() for col in grouped_holdings.columns.values]
    return grouped_holdings

In [None]:
scale = 1e5
pie_sector_data = sector_holdings["Estimated Holdings sum"]/scale
pie_sector_data /= pie_sector_data.sum()
pie_sector_data *= 100
indices = pie_sector_data>2

with plt.style.context('ggplot'):
    _ = plt.pie(pie_sector_data[indices], labels = sector_holdings["Sector"][indices]
                , wedgeprops={"edgecolor": "black"}, autopct="%1.2f%%"
                , shadow=True)
    plt.tight_layout
    plt.title("Pie Chart of Investment Amount by Sector \n(Others=2%)")
    plt.show()
# https://scc.ms.unimelb.edu.au/resources/data-visualisation-and-exploration/no_pie-charts

In [None]:
df_senator_party = df[["Senator", "Party"]]
df_senator_count_total = df.groupby("Senator").agg({"Party": "count"}).reset_index()
df_senator_count_total.columns = ["Senator", "Count"]
df_senator_count_total = pd.merge(df_senator_count_total, df_senator_party, on="Senator", how="left")

plt.hist(df_senator_count_total[df_senator_count_total["Party"]=="R"]["Count"], histtype="bar", alpha=0.8, color="red", label="R", bins=25)
plt.hist(df_senator_count_total[df_senator_count_total["Party"]=="D"]["Count"], histtype="bar", alpha=0.5, color="blue", label="D", bins=25)
plt.legend()

plt.title("Investment Diversity R vs D")
plt.xlabel("Number of differnt companies")
plt.ylabel('Count')