In [6]:
# ==========================================
# UCF VALUE ANALYSIS
# Author: Bowen Liu
# ==========================================

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# 1. LOAD & PREPROCESS DATA
# ------------------------------------------
# Load only necessary columns
cols_to_keep = [
    'INSTNM', 'CITY', 'STABBR', 'ADM_RATE', 'SAT_AVG',
    'COSTT4_A', 'MD_EARN_WNE_P10', 'GRAD_DEBT_MDN_SUPP',
    'C150_4', 'UGDS', 'CONTROL',
    'UGDS_WHITE', 'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN'
]

df = pd.read_csv('Most-Recent-Cohorts-Institution.csv', usecols=cols_to_keep, low_memory=False)

# Convert to numeric
numeric_cols = ['ADM_RATE', 'SAT_AVG', 'COSTT4_A', 'MD_EARN_WNE_P10',
                'GRAD_DEBT_MDN_SUPP', 'C150_4', 'UGDS']
for col in numeric_cols:
    df_fl[col] = pd.to_numeric(df_fl[col], errors='coerce')

# Clean data
df_clean = df_fl.dropna(subset=['MD_EARN_WNE_P10', 'COSTT4_A'])
ucf_data = df_clean[df_clean['INSTNM'] == 'University of Central Florida']

# Set Style
sns.set_style("whitegrid")
ucf_gold = '#FFC904' # UCF Official Gold
ucf_black = '#000000'

# ==========================================
# 2. GENERATE AND SAVE PLOTS
# ==========================================

# --- Plot 1: ROI (Cost vs Earnings) ---
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df_clean, x='COSTT4_A', y='MD_EARN_WNE_P10',
                alpha=0.6, color='grey', size='UGDS', sizes=(50, 400), legend=False)
plt.scatter(ucf_data['COSTT4_A'], ucf_data['MD_EARN_WNE_P10'],
            color=ucf_gold, s=300, edgecolor='black', zorder=5, label='UCF')
plt.title('ROI Analysis: Cost vs. 10-Year Earnings (FL Universities)', fontsize=12, fontweight='bold')
plt.xlabel('Annual Cost of Attendance ($)')
plt.ylabel('Median Earnings ($)')
plt.legend()
plt.tight_layout()
plt.savefig('1_ROI_Analysis.png', dpi=300) # Saves high-res PNG
plt.close()
print("Generated: 1_ROI_Analysis.png")

# --- Plot 2: Debt Distribution ---
plt.figure(figsize=(10, 6))
sns.histplot(df_clean['GRAD_DEBT_MDN_SUPP'], kde=True, color='skyblue', bins=20)
plt.axvline(ucf_data['GRAD_DEBT_MDN_SUPP'].values[0], color=ucf_gold,
            linestyle='--', linewidth=3, label='UCF Median Debt')
plt.title('Student Debt Distribution (FL Universities)', fontsize=12, fontweight='bold')
plt.xlabel('Median Graduate Debt ($)')
plt.legend()
plt.tight_layout()
plt.savefig('2_Debt_Distribution.png', dpi=300)
plt.close()
print("Generated: 2_Debt_Distribution.png")

# --- Plot 3: Selectivity Matrix ---
plt.figure(figsize=(10, 6))
df_selectivity = df_clean.dropna(subset=['ADM_RATE', 'SAT_AVG'])
sns.scatterplot(data=df_selectivity, x='ADM_RATE', y='SAT_AVG', alpha=0.6, color='grey', s=100)
plt.scatter(ucf_data['ADM_RATE'], ucf_data['SAT_AVG'],
            color=ucf_gold, s=300, edgecolor='black', zorder=5, label='UCF')
plt.title('Selectivity: Admission Rate vs. SAT Score', fontsize=12, fontweight='bold')
plt.xlabel('Admission Rate (Lower is More Selective)')
plt.ylabel('Average SAT Score')
plt.gca().invert_xaxis() # Invert so "Selective" is top-right visually
plt.legend()
plt.tight_layout()
plt.savefig('3_Selectivity_Matrix.png', dpi=300)
plt.close()
print("Generated: 3_Selectivity_Matrix.png")

# --- Plot 4: Completion Rates (Top Publics) ---
plt.figure(figsize=(10, 6))
# Filter for largest public unis (>15k students)
large_publics = df_clean[(df_clean['CONTROL'] == 1) & (df_clean['UGDS'] > 15000)].sort_values('C150_4', ascending=False)
colors = [ucf_gold if x == 'University of Central Florida' else 'lightgrey' for x in large_publics['INSTNM']]
sns.barplot(data=large_publics, x='C150_4', y='INSTNM', palette=colors, edgecolor='black')
plt.title('4-Year Completion Rates (Largest FL Public Unis)', fontsize=12, fontweight='bold')
plt.xlabel('Completion Rate (1.0 = 100%)')
plt.ylabel('')
plt.tight_layout()
plt.savefig('4_Completion_Rates.png', dpi=300)
plt.close()
print("Generated: 4_Completion_Rates.png")

# --- Plot 5: Earnings by Type ---
plt.figure(figsize=(10, 6))
sns.boxplot(data=df_clean, x='CONTROL', y='MD_EARN_WNE_P10', palette='Set2')
plt.title('Earnings Potential by Institution Type', fontsize=12, fontweight='bold')
plt.xticks([0, 1, 2], ['Public', 'Private Nonprofit', 'Private For-Profit'])
plt.ylabel('Median Earnings ($)')
plt.tight_layout()
plt.savefig('5_Earnings_Type.png', dpi=300)
plt.close()
print("Generated: 5_Earnings_Type.png")

# --- Plot 6: Diversity Snapshot ---
plt.figure(figsize=(10, 6))
fl_avg_diversity = df_clean[['UGDS_WHITE', 'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN']].mean()
ucf_div = ucf_data[['UGDS_WHITE', 'UGDS_BLACK', 'UGDS_HISP', 'UGDS_ASIAN']].iloc[0]

div_df = pd.DataFrame([ucf_div, fl_avg_diversity], index=['UCF', 'FL Avg'])
div_df.plot(kind='bar', stacked=True, color=['#e6e6e6', ucf_gold, 'black', 'grey'],
            figsize=(10,6), edgecolor='black')
plt.title('Student Diversity: UCF vs FL Average', fontsize=12, fontweight='bold')
plt.ylabel('Proportion of Student Body')
plt.xticks(rotation=0)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig('6_Diversity_Snapshot.png', dpi=300)
plt.close()
print("Generated: 6_Diversity_Snapshot.png")

print("\nSuccess! All 6 images have been saved to the file system.")

Generated: 1_ROI_Analysis.png
Generated: 2_Debt_Distribution.png
Generated: 3_Selectivity_Matrix.png



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=large_publics, x='C150_4', y='INSTNM', palette=colors, edgecolor='black')


Generated: 4_Completion_Rates.png



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=df_clean, x='CONTROL', y='MD_EARN_WNE_P10', palette='Set2')


Generated: 5_Earnings_Type.png
Generated: 6_Diversity_Snapshot.png

Success! All 6 images have been saved to the file system.


<Figure size 1000x600 with 0 Axes>