# MGMT 467 — Week 5 Companion: Plotly Visuals + Looker Studio Guide
_Generated: 2025-09-23_

Plotly versions of the five visuals plus a Looker Studio checklist with calculated fields.


In [ ]:
%%capture
!pip install pandas numpy plotly -q
import pandas as pd, numpy as np
import plotly.express as px
np.random.seed(467)
print('✅ Plotly environment ready')

In [ ]:
try:
    df_flights
    print('ℹ️ Using existing df_flights')
except NameError:
    print('⚙️ Creating synthetic df_flights...')
    dates = pd.date_range('2024-01-01','2024-06-30',freq='D')
    carriers=['AA','DL','UA','WN','AS']
    origins=['ATL','DFW','DEN','ORD','LAX','SEA','SFO','PHX','CLT','MCO']
    dests=['JFK','LGA','BOS','SFO','LAX','SEA','MIA','IAH','IAD','PHL']
    rows=[]
    for d in dates:
      for c in carriers:
        for _ in range(np.random.randint(80,140)):
          o, de = np.random.choice(origins), np.random.choice(dests)
          if o==de: continue
          dist=np.random.randint(300,2800); seats=np.random.choice([90,120,150,180,210])
          lf=float(np.clip(np.random.normal(0.82,0.07),0.55,0.98)); pax=int(seats*lf)
          dep=int(np.random.normal(8,15)); arr=dep+int(np.random.normal(0,12))
          dep=max(dep,-5); arr=max(arr,-10)
          cancelled=np.random.rand()<0.025; diverted=np.random.rand()<0.005
          revenue=pax*np.random.uniform(120,420); casm=np.random.uniform(0.08,0.16)
          cost=casm*seats*dist; fuel=dist*np.random.uniform(3.0,4.5); nps=np.random.normal(35,15)
          rows.append(dict(date=d,carrier=c,origin=o,dest=de,distance_miles=dist,seats=seats,passengers=pax,
                           dep_delay_min=dep,arr_delay_min=arr,cancelled=int(cancelled),diverted=int(diverted),
                           revenue_usd=round(revenue,2),cost_usd=round(cost,2),fuel_burn_gal=round(fuel,1),nps=round(nps,1)))
    df_flights=pd.DataFrame(rows)
    df_flights['on_time']=(df_flights['arr_delay_min']<=15).astype(int)
    df_flights['load_factor']=(df_flights['passengers']/df_flights['seats']).round(3)
    df_flights['asm']=df_flights['seats']*df_flights['distance_miles']
    df_flights['casm']=(df_flights['cost_usd']/df_flights['asm']).round(4)
    df_flights['month']=pd.to_datetime(df_flights['date']).dt.to_period('M').astype(str)
    df_flights['route']=df_flights['origin']+'-'+df_flights['dest']
    print('✅ df_flights created:', df_flights.shape)

## Plotly Visualizations

In [ ]:
rate=(df_flights.groupby('carrier',as_index=False)['on_time'].mean()
       .rename(columns={'on_time':'on_time_rate'}).sort_values('on_time_rate',ascending=False))
fig=px.bar(rate,x='carrier',y='on_time_rate',text='on_time_rate',labels={'on_time_rate':'On-Time Rate'},title='On-Time Rate by Carrier')
fig.update_traces(texttemplate='%{text:.1%}',textposition='outside'); fig.update_layout(yaxis_tickformat='.0%'); fig.show()

In [ ]:
daily=(df_flights.groupby('date',as_index=False)['arr_delay_min'].mean()
       .rename(columns={'arr_delay_min':'avg_arr_delay'}))
daily['roll7']=daily['avg_arr_delay'].rolling(7,min_periods=1).mean()
fig=px.line(daily,x='date',y=['avg_arr_delay','roll7'],labels={'value':'Minutes','variable':'Series'},title='Average Arrival Delay (Daily) + 7d Rolling')
fig.show()

In [ ]:
fig=px.histogram(df_flights,x='arr_delay_min',nbins=60,title='Arrival Delay Distribution')
fig.add_vline(x=0,line_dash='dash'); fig.show()

In [ ]:
top=(df_flights.groupby('route',as_index=False)['load_factor'].mean().sort_values('load_factor',ascending=False).head(15))
fig=px.bar(top,y='route',x='load_factor',orientation='h',labels={'load_factor':'Avg Load Factor','route':'Route'},title='Top 15 Routes by Avg Load Factor')
fig.update_layout(xaxis_tickformat='.0%'); fig.show()

In [ ]:
fig=px.scatter(df_flights.sample(5000,random_state=1),x='cost_usd',y='revenue_usd',color='casm',size='passengers',hover_data=['carrier','route','distance_miles'],labels={'casm':'CASM (USD/ASM)'},title='Cost vs Revenue (color=CASM, size=passengers)')
fig.add_shape(type='line',x0=0,y0=0,x1=float(df_flights['cost_usd'].max()),y1=float(df_flights['cost_usd'].max()),line=dict(dash='dash'))
fig.update_layout(xaxis_title='Cost (USD)',yaxis_title='Revenue (USD)'); fig.show()

## Export CSV for Looker Studio
`airline_kpis_synthetic.csv` will be created in the runtime for upload or BigQuery load.

In [ ]:
df_flights.to_csv('airline_kpis_synthetic.csv', index=False)
print('📁 Saved airline_kpis_synthetic.csv')

## Looker Studio Checklist
### Calculated Fields
- **On-Time Rate (%):** `100 * SUM(on_time) / COUNT(on_time)`
- **Load Factor:** `passengers / seats`
- **ASM:** `seats * distance_miles`
- **CASM:** `cost_usd / (seats * distance_miles)`
- **Yield:** `revenue_usd / (passengers * distance_miles)`
- **Route:** `CONCAT(origin,'-',dest)`
- **Profit Margin (opt):** `(revenue_usd - cost_usd) / NULLIF(revenue_usd,0)`

### Charts
1) Bar — **On-Time Rate by carrier** (sort desc, show % labels)
2) Time series — **Avg Arrival Delay** (daily) + trendline
3) Histogram — **Arrival delay distribution** (40–60 bins, ref at 0)
4) Bar (horizontal) — **Top 15 routes by Load Factor**
5) Bubble — **Cost vs Revenue** (size=passengers, color=CASM or Profit Margin)

### Filters & Controls
- Drop-downs for `carrier`, `month`, `Route`; optional date range.

### Layout Tips
- KPI scorecards on top (On-Time %, Avg Delay, Cancel %, Load Factor).
- Consistent color scales; concise legends; clear axes.
