In [3]:
import streamlit as st
import pandas as pd
import plotly.express as px
from pathlib import Path

# ---------- Configuration ----------z
# If the script is executed via "streamlit run" __file__ is defined,
# but when tested in notebooks it is not.  Fall back to the CWD.
DATA_DIR = Path(__file__).parent if "__file__" in globals() else Path.cwd()

st.set_page_config(
    page_title="Cannabis Market Portfolio App",
    page_icon="🌿",
    layout="wide",
)

# ---------- Data loaders ----------
@st.cache_data
def load_wa():
    wa = pd.read_csv(DATA_DIR / "wa_cannabis_sales - Sheet1.csv")
    wa["Sales Last Month"] = (
        wa["Sales Last Month"].replace(r"[\$,]", "", regex=True).astype(float)
    )
    wa["Period"] = pd.to_datetime(wa["Period"], format="mixed")
    return wa


@st.cache_data
def load_price():
    ppd = pd.read_csv(DATA_DIR / "price_per_ounce_mass_2025.csv")
    ppd["CCCLastUpdated"] = pd.to_datetime(ppd["CCCLastUpdated"], format="%m/%d/%Y")
    melted = ppd.melt(
        id_vars=["CCCLastUpdated", "SOLDDATE"],
        value_vars=["CONSUMER_OZ", "PATIENT_OZ", "GRANDTOTAL_OZ"],
        var_name="Category",
        value_name="Price_per_oz",
    )
    melted["SOLDDATE"] = pd.to_datetime(melted["SOLDDATE"], format="%m/%Y")
    return melted


@st.cache_data
def load_state():
    raw = pd.read_csv(
        DATA_DIR
        / "Colorado Marijuana_Sales_2014_To_2024_Report.xlsx - State Report.csv",
        skiprows=2,
        header=None,
        dtype=str,
    )
    # promote first row → header
    raw.columns = raw.iloc[0].str.replace("\n", " ").str.strip()
    df = raw.iloc[1:].copy()
    df = df.rename(
        columns={
            "Total Medical  Marijuana Sales ¹": "Medical_Sales",
            "Total Retail  Marijuana Sales ²": "Retail_Sales",
            "Total  Marijuana Sales": "Total_Sales",
        }
    )
    numeric_ok = (
        pd.to_numeric(df["Month"], errors="coerce").notnull()
        & pd.to_numeric(df["Year"], errors="coerce").notnull()
    )
    df = df[numeric_ok].copy()
    df["Month"] = df["Month"].astype(int)
    df["Year"] = df["Year"].astype(int)
    for col in ["Medical_Sales", "Retail_Sales", "Total_Sales"]:
        df[col] = df[col].replace(r"[\$,]", "", regex=True).astype(float)
    df["Date"] = pd.to_datetime({"year": df["Year"], "month": df["Month"], "day": 1})
    return df


@st.cache_data
def load_licenses():
    def _cat(name):
        return pd.read_csv(DATA_DIR / name, parse_dates=["Date Updated"])

    prod = pd.concat(
        [_cat("Product Manufacturers - Retail.csv"), _cat("Product Manufacturers - Medical.csv")]
    )
    stores = pd.concat(
        [_cat("Colorado_Stores - Retail.csv"), _cat("Colorado_Stores - Medical.csv")]
    )
    cultiv = pd.concat(
        [_cat("Colorado_Cultivations - Retail.csv"), _cat("Colorado_Cultivations - Medical.csv")]
    )
    return prod, stores, cultiv


# ---------- Page builders ----------

def colorado_sales_dashboard(state):
    st.header("Colorado Total Cannabis Sales (2014 – 2024)")
    fig = px.bar(state, x="Date", y="Total_Sales", labels={"Total_Sales": "Sales ($)"})
    st.plotly_chart(fig, use_container_width=True)


def price_dashboard(price_long):
    st.header("Average Price per Ounce Over Time")
    cats = price_long["Category"].unique().tolist()
    chosen = st.multiselect("Category", cats, default=cats)
    df = price_long[price_long["Category"].isin(chosen)]
    fig = px.line(df, x="SOLDDATE", y="Price_per_oz", color="Category")
    st.plotly_chart(fig, use_container_width=True)


def washington_dashboard(wa):
    st.header("Washington State – Sales Last Month")
    county = st.selectbox("County", sorted(wa["County"].dropna().unique()))
    df = wa[wa["County"] == county]
    fig = px.bar(
        df,
        x="Business Name",
        y="Sales Last Month",
        labels={"Sales Last Month": "Sales ($)"},
        title=f"{county} County",
    )
    st.plotly_chart(fig, use_container_width=True)
    if st.checkbox("Show raw data"):
        st.dataframe(df)


def license_dashboard(prod, stores, cultiv):
    st.header("Colorado Licenses Snapshot")
    tab1, tab2, tab3 = st.tabs(["Manufacturers", "Stores", "Cultivations"])

    with tab1:
        st.metric("Total manufacturers", f"{len(prod):,}")
        st.dataframe(prod[["Business Name", "County", "Date Updated"]])
    with tab2:
        st.metric("Total stores", f"{len(stores):,}")
        st.dataframe(stores[["Business Name", "County", "Date Updated"]])
    with tab3:
        st.metric("Total cultivations", f"{len(cultiv):,}")
        st.dataframe(cultiv[["Business Name", "County", "Date Updated"]])


# ---------- Main ----------

def main():
    wa = load_wa()
    price_long = load_price()
    state = load_state()
    prod, stores, cultiv = load_licenses()

    page = st.sidebar.radio(
        "Choose a dashboard",
        (
            "Colorado Sales Trends",
            "Price per Ounce",
            "Washington County Sales",
            "Colorado Licenses Overview",
        ),
    )

    if page == "Colorado Sales Trends":
        colorado_sales_dashboard(state)
    elif page == "Price per Ounce":
        price_dashboard(price_long)
    elif page == "Washington County Sales":
        washington_dashboard(wa)
    elif page == "Colorado Licenses Overview":
        license_dashboard(prod, stores, cultiv)


if __name__ == "__main__":
    main()


2025-05-18 15:45:22.967 No runtime found, using MemoryCacheStorageManager
2025-05-18 15:45:22.969 No runtime found, using MemoryCacheStorageManager
2025-05-18 15:45:22.970 No runtime found, using MemoryCacheStorageManager
2025-05-18 15:45:22.972 No runtime found, using MemoryCacheStorageManager


In [5]:
import requests
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pgeocode
import geopandas as gpd
import folium
from pathlib import Path
from flask import Flask
import streamlit as st  # not used, just keep dependencies consistent

# ——— 1) Load & clean WA sales data ———
wa_df = pd.read_csv('wa_cannabis_sales - Sheet1.csv')
wa_df['Sales'] = wa_df['Sales Last Month'].replace(r'[\$,]', '', regex=True).astype(float)
wa_df['Date']    = pd.to_datetime(wa_df['Period'], errors='coerce')
wa_df['Year']    = wa_df['Date'].dt.year
wa_df['Quarter'] = wa_df['Date'].dt.quarter
wa_df['County_clean'] = (
    wa_df['County']
         .str.replace(r'(?i)\s*county,\s*wa$', '', regex=True)
         .str.lower()
)
wa_name2fips = {
    'adams': '53001', 'asotin': '53003', 'benton': '53005',
    'chelan': '53007', 'clallam': '53009', 'clark': '53011',
    'columbia': '53013', 'cowlitz': '53015', 'douglas': '53017',
    'ferry': '53019', 'franklin': '53021', 'garfield': '53023',
    'grant': '53025', 'graysharbor': '53027','island': '53029',
    'jefferson':'53031', 'king': '53033', 'kitsap':'53035',
    'kittitas':'53037','klickitat':'53039','lewis':'53041',
    'lincoln':'53043','mason':'53045','okanogan':'53047',
    'pacific':'53049','pendoreille':'53051','pierce':'53053',
    'sanjuan':'53055','skagit':'53057','skamania':'53059',
    'snohomish':'53061','spokane':'53063','stevens':'53065',
    'thurston':'53067','wahkiakum':'53069','wallawalla':'53071',
    'whatcom':'53073','whitman':'53075','yakima':'53077'
}
wa_df['FIPS'] = wa_df['County_clean'].map(wa_name2fips)
wa_df = wa_df.dropna(subset=['FIPS'])

us_counties = requests.get(
    "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
).json()
wa_geo = {
    "type": "FeatureCollection",
    "features": [f for f in us_counties['features'] if f['id'].startswith('53')]
}

def generate_wa_map(year, quarter):
    dff = wa_df[(wa_df.Year == year) & (wa_df.Quarter == quarter)]
    map_df = dff.groupby('FIPS', as_index=False)['Sales'].sum()
    fig_map = px.choropleth_mapbox(
        map_df, geojson=wa_geo,
        locations='FIPS', featureidkey='id',
        color='Sales', color_continuous_scale='Greens',
        mapbox_style='carto-positron', zoom=5,
        center={'lat':47.5,'lon':-120.7},
        opacity=0.7, labels={'Sales':'$ Sales'}
    )
    fig_map.update_layout(margin={'r':0,'t':40,'l':0,'b':0})
    return fig_map

# ——— 2) Load CO license data & map ———
prod = pd.read_csv('Product Manufacturers - Retail.csv')
prod['license_type'] = 'Recreational'
med  = pd.read_csv('Product Manufacturers - Medical.csv')
med ['license_type'] = 'Medical'
co_df = pd.concat([prod, med], ignore_index=True)
co_df['ZIP Code'] = co_df['ZIP Code'].astype(str).str.zfill(5)
nomi = pgeocode.Nominatim('us')
co_df['county_name'] = co_df['ZIP Code'].apply(lambda z: nomi.query_postal_code(z).county_name)
counts = co_df.groupby(['county_name','license_type']).size().unstack(fill_value=0)
counts['dominant'] = counts.apply(lambda r: 'Recreational' if r['Recreational'] > r['Medical'] else 'Medical', axis=1)
counts = counts.reset_index()[['county_name','dominant']]
geo = gpd.read_file(
    'https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip'
)
co_geo = geo[geo['STATEFP']=='08']
co_geo = co_geo.merge(counts, left_on='NAME', right_on='county_name', how='left')

m = folium.Map(location=[39.0, -105.5], zoom_start=6, tiles='cartodbpositron')
folium.GeoJson(
    co_geo,
    style_function=lambda feat: {
        'fillColor': 'green' if feat['properties']['dominant']=='Recreational' else 'yellow',
        'color':'black','weight':1,'fillOpacity':0.7
    },
    tooltip=folium.GeoJsonTooltip(fields=['NAME','dominant'], aliases=['County','Dominant'])
).add_to(m)
map_html = m.get_root().render()

# ——— 3) Dash app combining both ———
app = dash.Dash(__name__, suppress_callback_exceptions=True)
app.layout = html.Div([
    dcc.Tabs(id='tabs', value='wa', children=[
        dcc.Tab(label='WA Cannabis Sales', value='wa'),
        dcc.Tab(label='CO License Map', value='co'),
    ]),
    html.Div(id='tabs-content')
])

wa_controls = html.Div([
    html.Div([html.Label('Year'), dcc.Dropdown(id='year', options=[{'label':y,'value':y} for y in sorted(wa_df.Year.unique())], value=sorted(wa_df.Year.unique())[-1], clearable=False)]),
    html.Div([html.Label('Quarter'), dcc.Dropdown(id='quarter', options=[{'label':f'Q{q}','value':q} for q in [1,2,3,4]], value=1, clearable=False)]),
    html.Div([html.Label('Top N'), dcc.Slider(id='top_n', min=5, max=50, step=1, value=20, marks={5:'5',20:'20',50:'50'}, tooltip={'placement':'bottom'})])
], style={'display':'flex','gap':'2rem','padding':'1rem'})

wa_layout = html.Div([wa_controls, dcc.Graph(id='wa-map'), dcc.Graph(id='lic-chart'), dcc.Graph(id='biz-chart')])

co_layout = html.Div([html.H2('Colorado: Dominant License Type by County'), html.Iframe(srcDoc=map_html, style={'width':'100%','height':'75vh'})])

@app.callback(Output('tabs-content','children'), Input('tabs','value'))
def render_tab(tab):
    if tab=='wa': return wa_layout
    return co_layout

@app.callback(
    [Output('wa-map','figure'), Output('lic-chart','figure'), Output('biz-chart','figure')],
    [Input('year','value'), Input('quarter','value'), Input('top_n','value')]
)
def update_wa(year, quarter, top_n):
    dff = wa_df[(wa_df.Year==year)&(wa_df.Quarter==quarter)]
    # map
    map_fig = generate_wa_map(year, quarter)
    # license types
    lic = dff.groupby('Business Type', as_index=False)['Sales'].sum().sort_values('Sales', ascending=False)
    lic_fig = px.bar(lic, x='Business Type', y='Sales', title=f'License Types — Q{quarter} {year}')
    # top businesses
    biz = dff.groupby('Business Name', as_index=False)['Sales'].sum().sort_values('Sales', ascending=False).head(top_n)
    biz_fig = px.bar(biz, x='Business Name', y='Sales', title=f'Top {top_n} Businesses — Q{quarter} {year}')
    return map_fig, lic_fig, biz_fig

if __name__=='__main__':
    app.run(debug=True, port=8050)



Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[5], line 128, in update_wa(year=2025, quarter=1, top_n=20)
    126 map_fig = generate_wa_map(year, quarter)
    127 # license types
--> 128 lic = dff.groupby('Business Type', as_index=False)['Sales'].sum().sort_values('Sales', ascending=False)
        dff =             Business Name                    License Type            City  \
0         Forbidden Farms                       Processor          Tacoma   
1          Edgemont Group                       Processor  East Wenatchee   
2         Lifted Cannabis  Tier None Producer & Processor          Tacoma   
3                    Ncmx     Tier 3 Producer & Processor          Tacoma   
4            Ttl Holdings  Tier None Producer & Processor         Seattle   
...                   ...                             ...             ...   
2562         Bulldog Weed                  