In [1]:
#Python Code: Simple Bubble Detection (AI vs Housing)
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Example: Detecting bubble-like growth
years = np.array([1,2,3,4,5,6,7,8,9,10])
housing_index = np.array([100,110,120,140,160,200,250,310,380,450])
ai_investment = np.array([50,60,70,100,150,220,330,500,750,1100])

df = pd.DataFrame({
    "years": years,
    "housing": housing_index,
    "ai_capital": ai_investment
})

# Fit linear model to detect if growth is exponential
model = LinearRegression()
model.fit(df[["years"]], np.log(df["ai_capital"]))

predicted = np.exp(model.predict(df[["years"]]))

df["predicted_ai"] = predicted
df["bubble_signal"] = df["ai_capital"] / df["predicted_ai"]

print(df)

# If bubble_signal >> 1, potential bubble behavior.


   years  housing  ai_capital  predicted_ai  bubble_signal
0      1      100          50     39.369147       1.270030
1      2      110          60     56.392992       1.063962
2      3      120          70     80.778219       0.866570
3      4      140         100    115.708006       0.864244
4      5      160         150    165.741989       0.905021
5      6      200         220    237.411461       0.926661
6      7      250         330    340.071953       0.970383
7      8      310         500    487.124475       1.026432
8      9      380         750    697.764846       1.074861
9     10      450        1100    999.489464       1.100562


In [2]:
#Python Code: Simple ‚ÄúTeam Behavior Risk Model‚Äù
import pandas as pd

# Example teammate behavior log
behavior = pd.DataFrame({
    "task": ["A","B","C","D","E","F"],
    "on_time": [0,1,0,0,1,0],     # 1=on time, 0=late
    "quality": [2,4,1,1,3,2],    # subjective scoring
    "communication": [1,3,1,0,2,1] # quality of communication
})

# Weighted score
weights = {
    "on_time": 0.4,
    "quality": 0.4,
    "communication": 0.2
}

behavior["score"] = (
    behavior["on_time"]*weights["on_time"] +
    behavior["quality"]*weights["quality"]/5 +
    behavior["communication"]*weights["communication"]/3
)

avg_score = behavior["score"].mean()

if avg_score < 0.4:
    risk = "High Risk - Reallocate immediately"
elif avg_score < 0.6:
    risk = "Moderate Risk - Document and monitor closely"
else:
    risk = "Low Risk - Reliable teammate"

print("Average Score:", avg_score)
print("Teammate Risk Level:", risk)


Average Score: 0.3955555555555556
Teammate Risk Level: High Risk - Reallocate immediately


In [3]:
#Python Code: Simple ‚ÄúCobalt Supply Risk Model‚Äù
import pandas as pd

# Simple supply-risk model
df = pd.DataFrame({
    "country": ["DRC", "Australia", "Cuba", "Philippines"],
    "global_share_pct": [70, 4, 2.5, 2],
    "political_risk": [0.9, 0.2, 0.3, 0.4],  # 0=low, 1=high
    "ethical_risk": [0.85, 0.1, 0.2, 0.25]
})

# Weighted Risk Score
df["risk_score"] = (
    0.6 * df["political_risk"] +
    0.4 * df["ethical_risk"]
)

df["weighted_supply_risk"] = df["risk_score"] * df["global_share_pct"]

total_risk = df["weighted_supply_risk"].sum()

print(df)
print("Total Cobalt Supply Chain Risk:", total_risk)


       country  global_share_pct  political_risk  ethical_risk  risk_score  \
0          DRC              70.0             0.9          0.85        0.88   
1    Australia               4.0             0.2          0.10        0.16   
2         Cuba               2.5             0.3          0.20        0.26   
3  Philippines               2.0             0.4          0.25        0.34   

   weighted_supply_risk  
0                 61.60  
1                  0.64  
2                  0.65  
3                  0.68  
Total Cobalt Supply Chain Risk: 63.57000000000001


In [8]:
""" Lavender Roses Flower Shop Full Financial Statements with Calculation Breakdown """

import pandas as pd

#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# 1. INPUT ASSUMPTIONS
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Revenue
revenue = 12_500

# COGS breakdown
flowers = 3_200
ribbons_vases = 900
packaging = 300
delivery_materials = 400
cogs = flowers + ribbons_vases + packaging + delivery_materials

# Operating expenses breakdown
rent = 2_400
wages = 1_800
utilities = 300
shopify_website = 150
marketing = 400
misc = 250
operating_expenses = rent + wages + utilities + shopify_website + marketing + misc

# Interest and tax
loan_balance = 3_500
monthly_interest_rate = 0.0342857  # chosen to approximate 120 interest
interest_expense = round(loan_balance * monthly_interest_rate, 2)  # ‚âà 120
tax_rate = 0.20

# Cash flow assumptions
beginning_cash = 3_200
cash_from_investing = -600
cash_from_financing = 1_000
# We'll derive cash_from_operations from operating income later

# Balance sheet assumptions (assets & liabilities)
inventory = 3_200
equipment = 7_800
# We'll derive cash from cash flow and equity from A = L + E
loan_payable = loan_balance
accounts_payable = 1_000

#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# 2. INCOME STATEMENT
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

gross_profit = revenue - cogs
operating_income = gross_profit - operating_expenses
ebt = operating_income - interest_expense  # Earnings Before Tax
tax_expense = round(ebt * tax_rate, 2)
net_income = round(ebt - tax_expense, 2)

income_statement = pd.DataFrame([
    ["Revenue", revenue, "Total sales from bouquets, events, subscriptions"],
    ["Cost of Goods Sold (COGS)", cogs,
     f"Flowers ({flowers}) + Ribbons/Vases ({ribbons_vases}) + "
     f"Packaging ({packaging}) + Delivery Materials ({delivery_materials})"],
    ["Gross Profit", gross_profit, "Revenue ‚Äì COGS"],
    ["Operating Expenses", operating_expenses,
     f"Rent ({rent}) + Wages ({wages}) + Utilities ({utilities}) + "
     f"Shopify/Website ({shopify_website}) + Marketing ({marketing}) + Misc ({misc})"],
    ["Operating Income", operating_income, "Gross Profit ‚Äì Operating Expenses"],
    ["Interest Expense", interest_expense, f"Loan Balance ({loan_balance}) √ó {monthly_interest_rate:.6f}"],
    ["Earnings Before Tax (EBT)", ebt, "Operating Income ‚Äì Interest Expense"],
    ["Tax Expense", tax_expense, f"EBT √ó Tax Rate ({tax_rate:.0%})"],
    ["Net Income", net_income, "EBT ‚Äì Tax Expense"]
], columns=["Line Item", "Amount", "Calculation"])

print("----------------------------------------------------------------------------------------------------------------------------------")
print("\nINCOME STATEMENT\n")
print("----------------------------------------------------------------------------------------------------------------------------------")
print(income_statement.to_string(index=False))
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# 3. CASH FLOW STATEMENT
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

# Assume 80% of operating income converts to cash from operations
cash_from_operations = round(operating_income * 0.80, 2)

ending_cash = beginning_cash + cash_from_operations + cash_from_investing + cash_from_financing

cash_flow_statement = pd.DataFrame([
    ["Beginning Cash", beginning_cash, "Cash at start of month"],
    ["Cash Flow From Operations", cash_from_operations,
     "Estimated: Operating Income √ó 80% cash conversion"],
    ["Cash Flow From Investing", cash_from_investing,
     "Purchase of equipment (e.g., new floral fridge)"],
    ["Cash Flow From Financing", cash_from_financing,
     "New loan or capital contribution"],
    ["Ending Cash", ending_cash,
     "Beginning Cash + CFO + CFI + CFF"]
], columns=["Category", "Amount", "Calculation"])

print("----------------------------------------------------------------------------------------------------------------------------------")
print("\nCASH FLOW STATEMENT\n")
print("----------------------------------------------------------------------------------------------------------------------------------")
print(cash_flow_statement.to_string(index=False))

#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# 4. BALANCE SHEET
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

cash = ending_cash
total_assets = cash + inventory + equipment

total_liabilities = loan_payable + accounts_payable
equity = total_assets - total_liabilities  # A = L + E ‚Üí E = A ‚Äì L

assets_section = pd.DataFrame([
    ["Cash", cash, "From Ending Cash in Cash Flow Statement"],
    ["Inventory", inventory, "Unsold flowers & materials at cost"],
    ["Equipment", equipment, "Cooler, shelves, POS, tools"],
    ["Total Assets", total_assets, "Cash + Inventory + Equipment"]
], columns=["Asset Category", "Amount", "Calculation"])

liabilities_section = pd.DataFrame([
    ["Loan Payable", loan_payable, "Outstanding balance of business loan"],
    ["Accounts Payable", accounts_payable, "Unpaid supplier invoices"],
    ["Total Liabilities", total_liabilities, "Loan Payable + Accounts Payable"]
], columns=["Liability Category", "Amount", "Calculation"])

equity_section = pd.DataFrame([
    ["Owner's Equity", equity, "Total Assets ‚Äì Total Liabilities"],
    ["Total Liabilities + Equity", total_liilities_plus_equity := total_liabilities + equity,
     "Must equal Total Assets"]
], columns=["Equity Category", "Amount", "Calculation"])

print("----------------------------------------------------------------------------------------------------------------------------------")
print("\nBALANCE SHEET\n")
print("----------------------------------------------------------------------------------------------------------------------------------")
print("Assets:\n")
print(assets_section.to_string(index=False))
print("\nLiabilties:\n")
print(liabilities_section.to_string(index=False))
print("\nEquity:\n")
print(equity_section.to_string(index=False))
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# 5. Summary Check
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

print("----------------------------------------------------------------------------------------------------------------------------------")
print("\nSummary Checks")
print("----------------------------------------------------------------------------------------------------------------------------------")
print(f"Net Income (should be ~1824): {net_income}")
print(f"Total Assets: {total_assets}")
print(f"Total Liabilities: {total_liabilities}")
print(f"Equity: {equity}")
print(f"Assets == Liabilities + Equity ? {total_assets == total_liabilities + equity}")
print(f"Ending Cash: {ending_cash}")



----------------------------------------------------------------------------------------------------------------------------------

INCOME STATEMENT

----------------------------------------------------------------------------------------------------------------------------------
                Line Item  Amount                                                                                         Calculation
                  Revenue 12500.0                                                    Total sales from bouquets, events, subscriptions
Cost of Goods Sold (COGS)  4800.0                   Flowers (3200) + Ribbons/Vases (900) + Packaging (300) + Delivery Materials (400)
             Gross Profit  7700.0                                                                                      Revenue ‚Äì COGS
       Operating Expenses  5300.0 Rent (2400) + Wages (1800) + Utilities (300) + Shopify/Website (150) + Marketing (400) + Misc (250)
         Operating Income  2400.0              

In [5]:
#Python Code: ‚ÄúPressure Index‚Äù ‚Äî Modeling Diamond vs. Rock Behavior
import pandas as pd
import numpy as np

# Example dataset of "pressure response" characteristics
data = pd.DataFrame({
    "material": ["rock", "rock", "diamond", "diamond"],
    "stress_tolerance": [0.2, 0.3, 0.95, 0.98],  # how much pressure it can withstand
    "structure_cohesion": [0.1, 0.25, 0.97, 0.99],  # internal integrity
    "clarity_factor": [0.05, 0.10, 0.90, 0.92]  # figurative "performance clarity"
})

# Weighted score to determine "value under pressure"
weights = {
    "stress_tolerance": 0.5,
    "structure_cohesion": 0.3,
    "clarity_factor": 0.2
}

data["pressure_index"] = (
    data["stress_tolerance"] * weights["stress_tolerance"] +
    data["structure_cohesion"] * weights["structure_cohesion"] +
    data["clarity_factor"] * weights["clarity_factor"]
)

# Classification
data["classification"] = np.where(
    data["pressure_index"] >= 0.8,
    "Diamond Behavior",
    "Rock Behavior"
)

print(data)


  material  stress_tolerance  structure_cohesion  clarity_factor  \
0     rock              0.20                0.10            0.05   
1     rock              0.30                0.25            0.10   
2  diamond              0.95                0.97            0.90   
3  diamond              0.98                0.99            0.92   

   pressure_index    classification  
0           0.140     Rock Behavior  
1           0.245     Rock Behavior  
2           0.946  Diamond Behavior  
3           0.971  Diamond Behavior  


In [6]:
#Python Code: ML Model to Detect ‚ÄúMixed Signals‚Äù
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# Example dataset (1 = consistent, 0 = mixed signals)
data = pd.DataFrame({
    "text_frequency": [10, 3, 15, 2, 8, 1, 12, 0],   # texts per day
    "response_time": [2, 48, 1, 72, 5, 96, 3, 120], # hours to respond
    "follow_through": [1, 0, 1, 0, 1, 0, 1, 0],     # does he keep plans?
    "initiates_contact": [1, 0, 1, 0, 1, 0, 1, 0],  # initiates?
    "emotion_consistency": [1, 0, 1, 0, 1, 0, 1, 0],# consistent tone?
    "label": [1,0,1,0,1,0,1,0]                     # 1=consistent,0=mixed
})

X = data.drop("label", axis=1)
y = data["label"]

model = RandomForestClassifier()
model.fit(X, y)

# Example evaluation of a new "guy"
test = pd.DataFrame({
    "text_frequency": [4],
    "response_time": [36],
    "follow_through": [0],
    "initiates_contact": [0],
    "emotion_consistency": [0]
})

prediction = model.predict(test)[0]
result = "Consistent King ‚ù§Ô∏è" if prediction == 1 else "Mixed Signal Menace üö©"

print("Prediction:", result)


#the code
# ==========================================
# MIXED SIGNALS vs GREEN-FLAG BF ML MODEL
# PCA, "RCA", Classification, CV, Metrics
# ==========================================

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    precision_score,
    recall_score,
    accuracy_score
)

import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(617)

# ------------------------------------------
# 1. SYNTHETIC DATASET: BF BEHAVIOR FEATURES
# ------------------------------------------
# 0 = mixed signals (red flag)
# 1 = green flag bf

n_samples = 400

# Features:
# - text_frequency: avg messages per day
# - response_time: avg hours to respond
# - follow_through: ratio of plans kept
# - initiates_contact: ratio of convos he starts
# - emotional_consistency: 0‚Äì1 scale
# - future_orientation: talks about future with you (0‚Äì1)
# - boundary_respect: respects your time/space (0‚Äì1)
# - accountability: owns mistakes (0‚Äì1)

# Green-flag bfs
n_green = n_samples // 2
green = pd.DataFrame({
    "text_frequency": np.random.normal(loc=10, scale=2, size=n_green).clip(3, 20),
    "response_time": np.random.normal(loc=1.5, scale=0.7, size=n_green).clip(0.1, 4),
    "follow_through": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
    "initiates_contact": np.random.normal(loc=0.7, scale=0.1, size=n_green).clip(0.3, 1),
    "emotional_consistency": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
    "future_orientation": np.random.normal(loc=0.8, scale=0.1, size=n_green).clip(0.4, 1),
    "boundary_respect": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
    "accountability": np.random.normal(loc=0.85, scale=0.07, size=n_green).clip(0.5, 1),
    "label": 1
})

# Mixed-signal guys
n_mixed = n_samples - n_green
mixed = pd.DataFrame({
    "text_frequency": np.random.normal(loc=4, scale=2, size=n_mixed).clip(0, 15),
    "response_time": np.random.normal(loc=24, scale=16, size=n_mixed).clip(1, 72),
    "follow_through": np.random.normal(loc=0.4, scale=0.15, size=n_mixed).clip(0, 0.8),
    "initiates_contact": np.random.normal(loc=0.3, scale=0.15, size=n_mixed).clip(0, 0.8),
    "emotional_consistency": np.random.normal(loc=0.5, scale=0.2, size=n_mixed).clip(0, 1),
    "future_orientation": np.random.normal(loc=0.3, scale=0.2, size=n_mixed).clip(0, 0.9),
    "boundary_respect": np.random.normal(loc=0.6, scale=0.2, size=n_mixed).clip(0, 1),
    "accountability": np.random.normal(loc=0.4, scale=0.2, size=n_mixed).clip(0, 1),
    "label": 0
})

df = pd.concat([green, mixed], ignore_index=True)

print("Sample of the dataset:")
print(df.head())

# ------------------------------------------
# 2. TRAIN / TEST SPLIT
# ------------------------------------------
X = df.drop("label", axis=1)
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=617, stratify=y
)

# ------------------------------------------
# 3. STANDARDIZATION
# ------------------------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ------------------------------------------
# 4. PCA (Standard)
# ------------------------------------------
pca = PCA(n_components=2, random_state=617)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

print("\nPCA explained variance ratio:", pca.explained_variance_ratio_)

# ------------------------------------------
# 5. "RCA" ‚Äì Randomized PCA Variant
#    (Here: PCA with randomized SVD solver)
# ------------------------------------------
rca = PCA(n_components=2, svd_solver="randomized", random_state=617)
X_train_rca = rca.fit_transform(X_train_scaled)
X_test_rca = rca.transform(X_test_scaled)

print("\n'RCA' explained variance ratio:", rca.explained_variance_ratio_)

# ------------------------------------------
# 6. CLASSIFICATION MODEL
#    Random Forest = flexible, good baseline
# ------------------------------------------
clf = RandomForestClassifier(
    n_estimators=300,
    random_state=617,
    max_depth=6,
    class_weight="balanced"
)

clf.fit(X_train_scaled, y_train)

# ------------------------------------------
# 7. CROSS-VALIDATION (Green Flag BF Detection)
# ------------------------------------------
cv_scores = cross_val_score(clf, X_train_scaled, y_train, cv=5, scoring="accuracy")
print("\nCross-Validation Accuracy Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())

# ------------------------------------------
# 8. EVALUATION ON TEST SET
# ------------------------------------------
y_pred = clf.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)   # precision for class 1 (green flag)
recall = recall_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("\n=== TEST SET METRICS ===")
print("Accuracy :", round(accuracy, 3))
print("Precision:", round(precision, 3))
print("Recall   :", round(recall, 3))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Mixed Signals (0)", "Green Flag (1)"]))

# ------------------------------------------
# 9. CONFUSION MATRIX HEATMAP
# ------------------------------------------
plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Pred: Mixed", "Pred: Green"],
            yticklabels=["True: Mixed", "True: Green"])
plt.title("Confusion Matrix ‚Äì Mixed Signals vs Green Flag")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.tight_layout()
plt.show()

# ------------------------------------------
# 10. PCA SCATTER PLOT (VISUALIZING BEHAVIOR SPACE)
# ------------------------------------------
plt.figure(figsize=(6, 5))
scatter = plt.scatter(
    X_train_pca[:, 0],
    X_train_pca[:, 1],
    c=y_train,
    alpha=0.7
)
plt.title("PCA of BF Behavior Space")
plt.xlabel("PC 1")
plt.ylabel("PC 2")
plt.legend(handles=scatter.legend_elements()[0],
           labels=["Mixed Signals (0)", "Green Flag (1)"],
           title="Class")
plt.tight_layout()
plt.show()

# ------------------------------------------
# 11. FEATURE IMPORTANCE BARPLOT
# ------------------------------------------
importances = clf.feature_importances_
feature_importance = pd.Series(importances, index=X.columns).sort_values(ascending=False)

plt.figure(figsize=(7, 4))
sns.barplot(x=feature_importance.values, y=feature_importance.index)
plt.title("Feature Importance ‚Äì What Predicts a Green-Flag BF?")
plt.xlabel("Importance")
plt.ylabel("Behavior Feature")
plt.tight_layout()
plt.show()

############################################
# ==========================================
# MIXED SIGNALS vs GREEN-FLAG BF ML MODEL
# + ROC & AUC
# ==========================================

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    precision_score,
    recall_score,
    accuracy_score,
    roc_curve,
    roc_auc_score
)

import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(617)

# ------------------------------------------
# 1. SYNTHETIC DATASET
# ------------------------------------------
n_samples = 400

n_green = n_samples // 2
green = pd.DataFrame({
    "text_frequency": np.random.normal(loc=10, scale=2, size=n_green).clip(3, 20),
    "response_time": np.random.normal(loc=1.5, scale=0.7, size=n_green).clip(0.1, 4),
    "follow_through": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
    "initiates_contact": np.random.normal(loc=0.7, scale=0.1, size=n_green).clip(0.3, 1),
    "emotional_consistency": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
    "future_orientation": np.random.normal(loc=0.8, scale=0.1, size=n_green).clip(0.4, 1),
    "boundary_respect": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
    "accountability": np.random.normal(loc=0.85, scale=0.07, size=n_green).clip(0.5, 1),
    "label": 1
})

n_mixed = n_samples - n_green
mixed = pd.DataFrame({
    "text_frequency": np.random.normal(loc=4, scale=2, size=n_mixed).clip(0, 15),
    "response_time": np.random.normal(loc=24, scale=16, size=n_mixed).clip(1, 72),
    "follow_through": np.random.normal(loc=0.4, scale=0.15, size=n_mixed).clip(0, 0.8),
    "initiates_contact": np.random.normal(loc=0.3, scale=0.15, size=n_mixed).clip(0, 0.8),
    "emotional_consistency": np.random.normal(loc=0.5, scale=0.2, size=n_mixed).clip(0, 1),
    "future_orientation": np.random.normal(loc=0.3, scale=0.2, size=n_mixed).clip(0, 0.9),
    "boundary_respect": np.random.normal(loc=0.6, scale=0.2, size=n_mixed).clip(0, 1),
    "accountability": np.random.normal(loc=0.4, scale=0.2, size=n_mixed).clip(0, 1),
    "label": 0
})

df = pd.concat([green, mixed], ignore_index=True)

print("Sample of the dataset:")
print(df.head())

X = df.drop("label", axis=1)
y = df["label"]

# ------------------------------------------
# 2. TRAIN / TEST SPLIT + SCALING
# ------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=617, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ------------------------------------------
# 3. PCA + "RCA"
# ------------------------------------------
pca = PCA(n_components=2, random_state=617)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

print("\nPCA explained variance ratio:", pca.explained_variance_ratio_)

rca = PCA(n_components=2, svd_solver="randomized", random_state=617)
X_train_rca = rca.fit_transform(X_train_scaled)
X_test_rca = rca.transform(X_test_scaled)

print("\n'RCA' explained variance ratio:", rca.explained_variance_ratio_)

# ------------------------------------------
# 4. CLASSIFIER
# ------------------------------------------
clf = RandomForestClassifier(
    n_estimators=300,
    random_state=617,
    max_depth=6,
    class_weight="balanced"
)

clf.fit(X_train_scaled, y_train)

# ------------------------------------------
# 5. CROSS-VALIDATION
# ------------------------------------------
cv_scores = cross_val_score(clf, X_train_scaled, y_train, cv=5, scoring="accuracy")
print("\nCross-Validation Accuracy Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())

# ------------------------------------------
# 6. METRICS ON TEST SET
# ------------------------------------------
y_pred = clf.predict(X_test_scaled)
y_proba = clf.predict_proba(X_test_scaled)[:, 1]  # probability of green flag (1)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("\n=== TEST SET METRICS ===")
print("Accuracy :", round(accuracy, 3))
print("Precision:", round(precision, 3))
print("Recall   :", round(recall, 3))

print("\nClassification Report:")
print(classification_report(y_test, y_pred,
                            target_names=["Mixed Signals (0)", "Green Flag (1)"]))

# ------------------------------------------
# 7. CONFUSION MATRIX HEATMAP
# ------------------------------------------
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt="d",
            xticklabels=["Pred: Mixed", "Pred: Green"],
            yticklabels=["True: Mixed", "True: Green"])
plt.title("Confusion Matrix ‚Äì Mixed Signals vs Green Flag")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.tight_layout()
plt.show()

# ------------------------------------------
# 8. ROC CURVE & AUC
# ------------------------------------------
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
auc_score = roc_auc_score(y_test, y_proba)

plt.figure(figsize=(5, 4))
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {auc_score:.3f})")
plt.plot([0, 1], [0, 1], linestyle="--", label="Random Guess")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve ‚Äì Green Flag BF Classifier")
plt.legend()
plt.tight_layout()
plt.show()

# ------------------------------------------
# 9. PCA SCATTER
# ------------------------------------------
plt.figure(figsize=(6, 5))
scatter = plt.scatter(X_train_pca[:, 0], X_train_pca[:, 1],
                      c=y_train, alpha=0.7)
plt.title("PCA of BF Behavior Space")
plt.xlabel("PC 1")
plt.ylabel("PC 2")
plt.legend(handles=scatter.legend_elements()[0],
           labels=["Mixed Signals (0)", "Green Flag (1)"],
           title="Class")
plt.tight_layout()
plt.show()

# ------------------------------------------
# 10. FEATURE IMPORTANCE
# ------------------------------------------
importances = clf.feature_importances_
feature_importance = pd.Series(importances, index=X.columns).sort_values(ascending=False)

plt.figure(figsize=(7, 4))
sns.barplot(x=feature_importance.values, y=feature_importance.index)
plt.title("Feature Importance ‚Äì What Predicts a Green-Flag BF?")
plt.xlabel("Importance")
plt.ylabel("Behavior Feature")
plt.tight_layout()
plt.show()


##########################################################
streamlit run dating_dashboard.py




# ==========================================
# Streamlit Dating Analytics Dashboard
# Mixed Signals vs Green-Flag BF
# ==========================================
pip install streamlit

import numpy as np
import pandas as pd
import streamlit as st

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    confusion_matrix, accuracy_score,
    precision_score, recall_score, roc_auc_score, roc_curve
)

import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(617)

# ---------- 1. Generate Synthetic Data (same logic as notebook) ----------

def generate_data(n_samples=400):
    n_green = n_samples // 2
    green = pd.DataFrame({
        "text_frequency": np.random.normal(loc=10, scale=2, size=n_green).clip(3, 20),
        "response_time": np.random.normal(loc=1.5, scale=0.7, size=n_green).clip(0.1, 4),
        "follow_through": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
        "initiates_contact": np.random.normal(loc=0.7, scale=0.1, size=n_green).clip(0.3, 1),
        "emotional_consistency": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
        "future_orientation": np.random.normal(loc=0.8, scale=0.1, size=n_green).clip(0.4, 1),
        "boundary_respect": np.random.normal(loc=0.9, scale=0.05, size=n_green).clip(0.7, 1),
        "accountability": np.random.normal(loc=0.85, scale=0.07, size=n_green).clip(0.5, 1),
        "label": 1
    })

    n_mixed = n_samples - n_green
    mixed = pd.DataFrame({
        "text_frequency": np.random.normal(loc=4, scale=2, size=n_mixed).clip(0, 15),
        "response_time": np.random.normal(loc=24, scale=16, size=n_mixed).clip(1, 72),
        "follow_through": np.random.normal(loc=0.4, scale=0.15, size=n_mixed).clip(0, 0.8),
        "initiates_contact": np.random.normal(loc=0.3, scale=0.15, size=n_mixed).clip(0, 0.8),
        "emotional_consistency": np.random.normal(loc=0.5, scale=0.2, size=n_mixed).clip(0, 1),
        "future_orientation": np.random.normal(loc=0.3, scale=0.2, size=n_mixed).clip(0, 0.9),
        "boundary_respect": np.random.normal(loc=0.6, scale=0.2, size=n_mixed).clip(0, 1),
        "accountability": np.random.normal(loc=0.4, scale=0.2, size=n_mixed).clip(0, 1),
        "label": 0
    })

    df = pd.concat([green, mixed], ignore_index=True)
    return df

df = generate_data()

X = df.drop("label", axis=1)
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=617, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

pca = PCA(n_components=2, random_state=617)
X_train_pca = pca.fit_transform(X_train_scaled)

clf = RandomForestClassifier(
    n_estimators=300,
    random_state=617,
    max_depth=6,
    class_weight="balanced"
)
clf.fit(X_train_scaled, y_train)

y_pred = clf.predict(X_test_scaled)
y_proba = clf.predict_proba(X_test_scaled)[:, 1]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
auc_score = roc_auc_score(y_test, y_proba)
cm = confusion_matrix(y_test, y_pred)

# ---------- 2. Streamlit UI ----------

st.set_page_config(page_title="Dating Analytics Dashboard", layout="wide")

st.title("üíö Dating Analytics Dashboard: Mixed Signals vs Green-Flag BF")
st.write("Because your heart deserves **data-driven** decisions.")

st.sidebar.header("Describe His Behavior")

text_frequency = st.sidebar.slider(
    "Texts per day (average)", min_value=0.0, max_value=20.0, value=6.0, step=0.5
)
response_time = st.sidebar.slider(
    "Average response time (hours)", min_value=0.1, max_value=72.0, value=12.0, step=0.5
)
follow_through = st.sidebar.slider(
    "Follow-through on plans (0‚Äì1)", min_value=0.0, max_value=1.0, value=0.6, step=0.05
)
initiates_contact = st.sidebar.slider(
    "Initiates contact ratio (0‚Äì1)", min_value=0.0, max_value=1.0, value=0.5, step=0.05
)
emotional_consistency = st.sidebar.slider(
    "Emotional consistency (0‚Äì1)", min_value=0.0, max_value=1.0, value=0.6, step=0.05
)
future_orientation = st.sidebar.slider(
    "Talks about future with you (0‚Äì1)", min_value=0.0, max_value=1.0, value=0.4, step=0.05
)
boundary_respect = st.sidebar.slider(
    "Respects your boundaries (0‚Äì1)", min_value=0.0, max_value=1.0, value=0.7, step=0.05
)
accountability = st.sidebar.slider(
    "Accountability (owns mistakes) (0‚Äì1)", min_value=0.0, max_value=1.0, value=0.5, step=0.05
)

user_input = pd.DataFrame({
    "text_frequency": [text_frequency],
    "response_time": [response_time],
    "follow_through": [follow_through],
    "initiates_contact": [initiates_contact],
    "emotional_consistency": [emotional_consistency],
    "future_orientation": [future_orientation],
    "boundary_respect": [boundary_respect],
    "accountability": [accountability]
})

user_scaled = scaler.transform(user_input)
user_proba = clf.predict_proba(user_scaled)[0, 1]
user_pred = clf.predict(user_scaled)[0]

st.subheader("üìä Prediction for This Guy")

if user_pred == 1:
    st.success(f"‚úÖ **Green-Flag BF Detected** ‚Äî Probability: {user_proba:.2%}")
else:
    st.error(f"üö© **Mixed Signal Menace** ‚Äî Probability of Green Flag: {user_proba:.2%}")

st.caption("Note: This is a playful model using synthetic data, not actual psychometrics.")

# ---------- 3. Metrics and Confusion Matrix ----------

col1, col2 = st.columns(2)

with col1:
    st.subheader("Model Performance (Test Set)")
    st.metric("Accuracy", f"{accuracy:.3f}")
    st.metric("Precision (Green Flag)", f"{precision:.3f}")
    st.metric("Recall (Green Flag)", f"{recall:.3f}")
    st.metric("AUC", f"{auc_score:.3f}")

with col2:
    st.subheader("Confusion Matrix")
    fig_cm, ax_cm = plt.subplots(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt="d", ax=ax_cm,
                xticklabels=["Pred: Mixed", "Pred: Green"],
                yticklabels=["True: Mixed", "True: Green"])
    ax_cm.set_ylabel("True Label")
    ax_cm.set_xlabel("Predicted Label")
    st.pyplot(fig_cm)

# ---------- 4. ROC Curve ----------

st.subheader("ROC Curve ‚Äì Green Flag BF Classifier")
fpr, tpr, _ = roc_curve(y_test, y_proba)
fig_roc, ax_roc = plt.subplots(figsize=(4, 3))
ax_roc.plot(fpr, tpr, label=f"AUC = {auc_score:.3f}")
ax_roc.plot([0, 1], [0, 1], linestyle="--")
ax_roc.set_xlabel("False Positive Rate")
ax_roc.set_ylabel("True Positive Rate")
ax_roc.legend()
st.pyplot(fig_roc)

# ---------- 5. PCA Plot ----------

st.subheader("PCA Visualization of BF Behavior Space")
fig_pca, ax_pca = plt.subplots(figsize=(4, 3))
scatter = ax_pca.scatter(X_train_pca[:, 0], X_train_pca[:, 1],
                         c=y_train, alpha=0.7)
ax_pca.set_xlabel("PC 1")
ax_pca.set_ylabel("PC 2")
legend_labels = ["Mixed Signals (0)", "Green Flag (1)"]
ax_pca.legend(handles=scatter.legend_elements()[0],
              labels=legend_labels,
              title="Class")
st.pyplot(fig_pca)

# ---------- 6. Feature Importance ----------

st.subheader("Which Behaviors Matter Most?")
importances = clf.feature_importances_
feature_importance = pd.Series(importances, index=X.columns).sort_values(ascending=False)

fig_imp, ax_imp = plt.subplots(figsize=(5, 3))
sns.barplot(x=feature_importance.values, y=feature_importance.index, ax=ax_imp)
ax_imp.set_xlabel("Importance")
ax_imp.set_ylabel("Behavior Feature")
st.pyplot(fig_imp)

st.caption("Use this to decide where your standards stay non-negotiable üíÖ.")




SyntaxError: invalid syntax (23531093.py, line 426)