In [8]:
import gradio as gr
import pandas as pd
import joblib

# 1. 📦 SMOTE Logistic Regression .pkl файлуудыг ачаалах
model = joblib.load("startup_model_logreg_smote.pkl")
scaler = joblib.load("scaler_logreg_smote.pkl")
label_encoders = joblib.load("label_encoders_logreg_smote.pkl")
feature_order = joblib.load("feature_order_logreg_smote.pkl")

# 2. 🧠 Таамаг гаргах функц
def predict(state, category, funding, relationships, milestones, participants, vc, angel, top500):
    features = {col: 0 for col in feature_order}

    # Муж
    if state in label_encoders['state_code'].classes_:
        features['state_code'] = label_encoders['state_code'].transform([state])[0]
    features[f"is_{state}" if f"is_{state}" in feature_order else "is_otherstate"] = 1

    # Салбар
    if category in label_encoders['category_code'].classes_:
        features['category_code'] = label_encoders['category_code'].transform([category])[0]
    features[f"is_{category}" if f"is_{category}" in feature_order else "is_othercategory"] = 1

    # Тоон болон логик талбарууд
    features['funding_total_usd'] = funding * 1_000_000
    features['relationships'] = relationships
    features['milestones'] = milestones
    features['avg_participants'] = participants
    features['has_VC'] = int(vc)
    features['has_angel'] = int(angel)
    features['is_top500'] = int(top500)

    # Default-required
    features['closed_at'] = 0
    features['status'] = 0

    df = pd.DataFrame([features])[feature_order]
    df_scaled = scaler.transform(df)
    prob = model.predict_proba(df_scaled)[0]
    pred = model.predict(df_scaled)[0]

    # ✅ Зөвлөмжтэй хариу буцаах
    if pred == 1:
        msg = f"✅ Танай стартап амжилттай болох магадлал: {prob[1]:.2%}.\n\n🚀 Зөвлөмж: Илүү milestone хэрэгжүүлж, хөрөнгө оруулалтаа тогтвортой нэмэгдүүлээрэй."
    else:
        msg = f"❌ Танай стартап амжилтгүй болох магадлал: {prob[0]:.2%}.\n\n📉 Зөвлөмж: VC эсвэл angel хөрөнгө оруулагч татах, багийн бүтцийг бэхжүүлэх шаардлагатай байж магадгүй."

    return msg

# 3. 🌐 Gradio chatbot-style interface
with gr.Blocks(title="Startup Chatbot (LogReg + SMOTE)") as chatbot:
    gr.Markdown("## 🤖 Стартап Амжилтын Таамаглагч Чатбот (Logistic Regression + SMOTE)")
    gr.Markdown("Та асуултад хариулаад ML загварын таамаг + зөвлөмжөө аваарай.")

    with gr.Row():
        state = gr.Dropdown(["CA", "NY", "TX", "MA", "otherstate"], label="1. Аль мужид байрладаг вэ?")
        category = gr.Dropdown(["biotech", "software", "web", "othercategory"], label="2. Ямар салбарт ажилладаг вэ?")

    funding = gr.Slider(0, 100, step=1, label="3. Хөрөнгө (сая USD)", value=1)
    relationships = gr.Slider(0, 20, step=1, label="4. Co-founder болон харилцааны тоо", value=3)
    milestones = gr.Slider(0, 10, step=1, label="5. Milestone-ийн тоо", value=2)
    participants = gr.Slider(0, 10, step=1, label="6. Дундаж хөрөнгө оруулагчийн тоо", value=4)

    vc = gr.Checkbox(label="7. VC хөрөнгө оруулагчтай юу?")
    angel = gr.Checkbox(label="8. Angel хөрөнгө оруулагчтай юу?")
    top500 = gr.Checkbox(label="9. Top 500-д багтсан уу?")

    btn = gr.Button("📊 Таамаг гаргах")
    result = gr.Textbox(label="🧠 Таамаг ба Зөвлөмж")

    btn.click(fn=predict, inputs=[
        state, category, funding, relationships,
        milestones, participants, vc, angel, top500
    ], outputs=result)

chatbot.launch(share=False)


Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.




In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from imblearn.over_sampling import SMOTE
import joblib

# 1. 📥 CSV датаг унших
df = pd.read_csv("C:/Users/Dell/Downloads/Ecn325 data (1).csv")  # ← Замыг өөрийн компьютерт тааруулна уу

# 2. 🎯 Target хөрвүүлэх ('acquired'=1, 'closed'=0)
df = df[df['status'].isin(['acquired', 'closed'])].copy()
df['status'] = df['status'].map({'acquired': 1, 'closed': 0})

# 3. 🗑️ Хэрэггүй багануудыг устгах
columns_to_drop = [
    'Unnamed: 0', 'Unnamed: 6', 'id', 'name',
    'object_id', 'state_code.1'
]
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# 4. 🏷️ Категори багануудыг Label Encode хийх
label_encoders = {}
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    df[col] = df[col].fillna("unknown")
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 5. 📊 Тоон утгуудын хоосон утгыг дунджаар нөхөх
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())

# 6. 🧮 Features ба Target-г салгах
X = df.drop(columns=["status"])
y = df["status"]

# 7. ⚖️ Стандартад оруулах
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 8. 🧪 SMOTE ашиглаж balance хийх
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# 9. ✂️ Train-test split хийх
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42
)

# 10. 🔁 Logistic Regression сургалт
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# 11. 📈 Үр дүн
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"✅ Accuracy: {acc:.2%}")
print(f"📊 F1-score: {f1:.2%}")

# 12. 💾 Файлуудыг хадгалах
joblib.dump(model, "startup_model_logreg_smote.pkl")
joblib.dump(scaler, "scaler_logreg_smote.pkl")
joblib.dump(label_encoders, "label_encoders_logreg_smote.pkl")
joblib.dump(X.columns.tolist(), "feature_order_logreg_smote.pkl")
print("✅ Бүх .pkl файлууд SMOTE загвараар үүсгэгдлээ.")


✅ Accuracy: 77.82%
📊 F1-score: 77.25%
✅ Бүх .pkl файлууд SMOTE загвараар үүсгэгдлээ.




In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
import joblib

# 1. CSV датаг унших
df = pd.read_csv("C:/Users/Dell/Downloads/Ecn325 data (1).csv")  # ← замаа тааруулна уу

# 2. Target хөрвүүлэх ('acquired'=1, 'closed'=0)
df = df[df['status'].isin(['acquired', 'closed'])].copy()
df['status'] = df['status'].map({'acquired': 1, 'closed': 0})

# 3. Хэрэггүй багануудыг хасах
columns_to_drop = [
    'Unnamed: 0', 'Unnamed: 6', 'id', 'name',
    'object_id', 'state_code.1'
]
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# 4. Категори талбаруудыг Label Encode
label_encoders = {}
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    df[col] = df[col].fillna("unknown")
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 5. Тоон багануудын хоосон утгыг дунджаар нөхөх
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())

# 6. Features болон Target-г салгах
X = df.drop(columns=["status"])
y = df["status"]

# 7. Стандартад оруулах
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 8. Train-Test split хийх
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42)

# 9. Logistic Regression model сургалт
model = LogisticRegression(
    class_weight='balanced',  # Imbalance-г засна
    max_iter=1000,
    random_state=42
)
model.fit(X_train, y_train)

# 10. Үр дүн
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"🎯 Accuracy: {acc:.2%}")
print(f"📊 F1-score: {f1:.2%}")

# 11. .pkl файлуудыг хадгалах
joblib.dump(model, "startup_model_logreg.pkl")
joblib.dump(scaler, "scaler_logreg.pkl")
joblib.dump(label_encoders, "label_encoders_logreg.pkl")
joblib.dump(X.columns.tolist(), "feature_order_logreg.pkl")
print("✅ Бүх .pkl файлууд Logistic Regression хувилбараар үүсгэгдлээ.")


🎯 Accuracy: 68.65%
📊 F1-score: 72.90%
✅ Бүх .pkl файлууд Logistic Regression хувилбараар үүсгэгдлээ.


In [11]:
import gradio as gr
import pandas as pd
import joblib

# 1. 📦 Logistic загварын .pkl файлуудыг ачаалах
model = joblib.load("startup_model_logreg.pkl")
scaler = joblib.load("scaler_logreg.pkl")
label_encoders = joblib.load("label_encoders_logreg.pkl")
feature_order = joblib.load("feature_order_logreg.pkl")

# 2. 🧠 Таамаг гаргах функц
def predict(state, category, funding, relationships, milestones, participants, vc, angel, top500):
    features = {col: 0 for col in feature_order}

    # Муж
    if state in label_encoders['state_code'].classes_:
        features['state_code'] = label_encoders['state_code'].transform([state])[0]
    features[f"is_{state}" if f"is_{state}" in feature_order else "is_otherstate"] = 1

    # Салбар
    if category in label_encoders['category_code'].classes_:
        features['category_code'] = label_encoders['category_code'].transform([category])[0]
    features[f"is_{category}" if f"is_{category}" in feature_order else "is_othercategory"] = 1

    # Тоон болон логик талбарууд
    features['funding_total_usd'] = funding * 1_000_000
    features['relationships'] = relationships
    features['milestones'] = milestones
    features['avg_participants'] = participants
    features['has_VC'] = int(vc)
    features['has_angel'] = int(angel)
    features['is_top500'] = int(top500)

    # Default-required
    features['closed_at'] = 0
    features['status'] = 0

    df = pd.DataFrame([features])[feature_order]
    df_scaled = scaler.transform(df)
    prob = model.predict_proba(df_scaled)[0]
    pred = model.predict(df_scaled)[0]

    # ✅ Зөвлөмжтэй хариу буцаах
    if pred == 1:
        msg = f"✅ Танай стартап амжилттай болох магадлал: {prob[1]:.2%}.\n\n🚀 Зөвлөмж: Илүү milestone хэрэгжүүлж, хөрөнгө оруулалтаа тогтвортой нэмэгдүүлээрэй."
    else:
        msg = f"❌ Танай стартап амжилтгүй болох магадлал: {prob[0]:.2%}.\n\n📉 Зөвлөмж: VC эсвэл angel хөрөнгө оруулагч татах, багийн бүтцийг бэхжүүлэх шаардлагатай байж магадгүй."

    return msg

# 3. 🌐 Gradio chatbot-style interface
with gr.Blocks(title="Startup Chatbot (LogReg)") as chatbot:
    gr.Markdown("## 🤖 Стартап Амжилтын Таамаглагч")
    gr.Markdown("Та асуултад хариулаад ML загварын таамаг + зөвлөмжөө аваарай.")

    with gr.Row():
        state = gr.Dropdown(["CA", "NY", "TX", "MA", "otherstate"], label="1. Аль мужид байрладаг вэ?")
        category = gr.Dropdown(["biotech", "software", "web", "othercategory"], label="2. Ямар салбарт ажилладаг вэ?")

    funding = gr.Slider(0, 100, step=1, label="3. Хөрөнгө (сая USD)", value=1)
    relationships = gr.Slider(0, 20, step=1, label="4. Co-founder болон харилцааны тоо", value=3)
    milestones = gr.Slider(0, 10, step=1, label="5. Milestone-ийн тоо", value=2)
    participants = gr.Slider(0, 10, step=1, label="6. Дундаж хөрөнгө оруулагчийн тоо", value=4)

    vc = gr.Checkbox(label="7. VC хөрөнгө оруулагчтай юу?")
    angel = gr.Checkbox(label="8. Angel хөрөнгө оруулагчтай юу?")
    top500 = gr.Checkbox(label="9. Top 500-д багтсан уу?")

    btn = gr.Button("📊 Таамаг гаргах")
    result = gr.Textbox(label="🧠 Таамаг ба Зөвлөмж")

    btn.click(fn=predict, inputs=[
        state, category, funding, relationships,
        milestones, participants, vc, angel, top500
    ], outputs=result)

chatbot.launch(share=False)


Running on local URL:  http://127.0.0.1:7865

Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB

To create a public link, set `share=True` in `launch()`.




In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import joblib

# 1. 📥 CSV унших
df = pd.read_csv("C:/Users/Dell/Downloads/Ecn325 data (1).csv")

# 2. 🎯 Target: 'status' -> acquired: 1, closed: 0
df = df[df['status'].isin(['acquired', 'closed'])].copy()
df['status'] = df['status'].map({'acquired': 1, 'closed': 0})

# 3. 🗑️ Хэрэггүй багануудыг хасах
columns_to_drop = [
    'Unnamed: 0', 'Unnamed: 6', 'id', 'name',
    'object_id', 'state_code.1', 'labels', 'closed_at'
]
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# 4. 🏷️ Категори багануудыг Label Encode хийх
label_encoders = {}
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    df[col] = df[col].fillna("unknown")
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 5. 📊 Тоон багануудын null-уудыг дунджаар нөхөх
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())

# 6. 🔀 Features болон target-г салгах
X = df.drop(columns=["status"])
y = df["status"]

# 7. ⚖️ Стандартад оруулах
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 8. ✂️ Train-Test split (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# 9. 🌲 Загвар сургалт (илүү сайн тохиргоотой RandomForest)
model = RandomForestClassifier(
    n_estimators=200, max_depth=10, class_weight='balanced', random_state=42
)
model.fit(X_train, y_train)

# 10. 📈 Үр дүн шалгах
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"✅ Загварын Accuracy: {acc:.2%}")
print(f"📊 F1-score: {f1:.2%}")

# 11. 💾 Бүх .pkl файлуудыг хадгалах
joblib.dump(model, "startup_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")
joblib.dump(X.columns.tolist(), "feature_order.pkl")
print("✅ Бүх .pkl файлууд амжилттай хадгалагдлаа.")


✅ Загварын Accuracy: 78.38%
📊 F1-score: 84.13%
✅ Бүх .pkl файлууд амжилттай хадгалагдлаа.


In [15]:
import gradio as gr
import pandas as pd
import joblib
import re
import traceback

# 1. Загвар болон туслах файлуудыг ачаалах
model = joblib.load("startup_model.pkl")
scaler = joblib.load("scaler.pkl")
label_encoders = joblib.load("label_encoders.pkl")
feature_order = joblib.load("feature_order.pkl")

# 2. Feature гаргах өргөтгөсөн функц
def extract_features(text):
    features = {col: 0 for col in feature_order}

    # --- Байршил ---
    if re.search(r'\b(CA|California)\b', text, re.I):
        if 'state_code' in label_encoders and 'CA' in label_encoders['state_code'].classes_:
            features['state_code'] = label_encoders['state_code'].transform(['CA'])[0]
            features['is_CA'] = 1
    elif re.search(r'\b(NY|New York)\b', text, re.I):
        if 'state_code' in label_encoders and 'NY' in label_encoders['state_code'].classes_:
            features['state_code'] = label_encoders['state_code'].transform(['NY'])[0]
            features['is_NY'] = 1
    else:
        features['is_otherstate'] = 1

    # --- Салбар ---
    if 'category_code' in label_encoders:
        if re.search(r'biotech', text, re.I):
            features['category_code'] = label_encoders['category_code'].transform(['biotech'])[0]
            features['is_biotech'] = 1
        elif re.search(r'software', text, re.I):
            features['category_code'] = label_encoders['category_code'].transform(['software'])[0]
            features['is_software'] = 1
        elif re.search(r'web', text, re.I):
            features['category_code'] = label_encoders['category_code'].transform(['web'])[0]
            features['is_web'] = 1
        else:
            features['is_othercategory'] = 1

    # --- Хөрөнгө оруулалт ---
    match = re.search(r'(\d+(?:\.\d+)?)\s*(сая|million|m)', text, re.I)
    if match:
        features['funding_total_usd'] = float(match.group(1)) * 1_000_000
    else:
        features['funding_total_usd'] = 1_000_000  # default

    # --- Хөрөнгө оруулагчид ---
    if "angel" in text.lower():
        features['has_angel'] = 1
    if "vc" in text.lower() or "venture" in text.lower():
        features['has_VC'] = 1

    # --- Харилцаа, milestone, оролцогчид, top500 ---
    match = re.search(r'(\d+)\s+(хамтрагч|co[- ]?founder|partner)', text, re.I)
    if match:
        features['relationships'] = int(match.group(1))
    else:
        features['relationships'] = 3

    match = re.search(r'(\d+)\s+(milestone|шат)', text, re.I)
    if match:
        features['milestones'] = int(match.group(1))
    else:
        features['milestones'] = 2

    match = re.search(r'(\d+)\s+(investor|хөрөнгө оруулагч)', text, re.I)
    if match:
        features['avg_participants'] = int(match.group(1))
    else:
        features['avg_participants'] = 4

    if re.search(r'top\s*500', text, re.I):
        features['is_top500'] = 1

    # Зайлшгүй талбарууд
    features['closed_at'] = 0
    features['status'] = 0

    return features

# 3. Таамаг гаргах үндсэн функц
def smart_predict(text):
    try:
        input_data = extract_features(text)
        df = pd.DataFrame([input_data])
        df = df[feature_order]  # зөв дарааллыг баримтална
        df_scaled = scaler.transform(df)

        prob = model.predict_proba(df_scaled)[0]
        pred = model.predict(df_scaled)[0]

        if pred == 1:
            return f"✅ Амжилттай болох магадлал: {prob[1]:.2%}"
        else:
            return f"❌ Амжилтгүй болох магадлал: {prob[0]:.2%}"
    except Exception as e:
        traceback.print_exc()
        return "⚠️ Алдаа гарлаа. Та терминалаас шалгана уу."

# 4. Gradio интерфэйс
iface = gr.Interface(
    fn=smart_predict,
    inputs=gr.Textbox(lines=4, label="Стартапынхаа мэдээллийг бичнэ үү"),
    outputs="text",
    title="Стартап Амжилтын Таамаглагч",
    description="Жишээ: 'Манай стартап CA-д байрладаг, biotech салбарт ажилладаг, 5 сая долларын хөрөнгө авсан'",
    examples=[
        ["Манай стартап CA-д байрладаг, biotech салбарт ажилладаг, 5 сая долларын хөрөнгө авсан, VC оролцсон"],
        ["Software хийдэг, angel хөрөнгө оруулагчтай"],
        ["2 milestone-той, 5 хөрөнгө оруулагчтай, top 500-д багтсан"],
        ["New York-based startup with 3 co-founders and 2 rounds of funding"]
    ]
)

iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7871
Running on public URL: https://5abeeaa70477407867.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


