In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import joblib

# 1. 📥 CSV файл унших
df = pd.read_csv("C:/Users/Dell/Downloads/Ecn325 data (1).csv")  # ← замаа тааруулна уу

# 2. 🎯 Зорилтот баганыг боловсруулах ('acquired'=1, 'closed'=0)
df = df[df['status'].isin(['acquired', 'closed'])].copy()
df['status'] = df['status'].map({'acquired': 1, 'closed': 0})

# 3. 🗑️ Хэрэггүй багануудыг устгах
columns_to_drop = [
    'Unnamed: 0', 'Unnamed: 6', 'id', 'name',
    'object_id', 'state_code.1'  # 'labels', 'closed_at' үлдээнэ
]
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# 4. 🏷️ Категори багануудыг Label Encode хийх
label_encoders = {}
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    df[col] = df[col].fillna("unknown")
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 5. 📊 Тоон утгуудын null-уудыг дунджаар нөхөх
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())

# 6. 🧮 Features ба Target-г салгах
X = df.drop(columns=["status"])
y = df["status"]

# 7. ⚖️ StandardScaler ашиглах
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 8. ✂️ Train/Test Split хийх (Stratify ашиглан)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)

# 9. 🌲 RandomForest сургалт хийх
model = RandomForestClassifier(
    n_estimators=300,
    max_depth=12,
    class_weight='balanced',
    random_state=42
)
model.fit(X_train, y_train)

# 10. 📈 Үр дүн хэвлэх
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"🎯 Accuracy: {acc:.2%}")
print(f"📊 F1-score: {f1:.2%}")

# 11. 💾 .pkl файлуудыг хадгалах
joblib.dump(model, "startup_model_pro.pkl")
joblib.dump(scaler, "scaler_pro.pkl")
joblib.dump(label_encoders, "label_encoders_pro.pkl")
joblib.dump(X.columns.tolist(), "feature_order_pro.pkl")
print("✅ Бүх .pkl файлууд амжилттай үүсгэгдлээ.")


🎯 Accuracy: 78.92%
📊 F1-score: 84.58%
✅ Бүх .pkl файлууд амжилттай үүсгэгдлээ.


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from imblearn.over_sampling import SMOTE
import joblib

# 1. 📥 CSV датаг унших
df = pd.read_csv("C:/Users/Dell/Downloads/Ecn325 data (1).csv")  # ← Замыг өөрийн компьютерт тааруулна уу

# 2. 🎯 Target хөрвүүлэх ('acquired'=1, 'closed'=0)
df = df[df['status'].isin(['acquired', 'closed'])].copy()
df['status'] = df['status'].map({'acquired': 1, 'closed': 0})

# 3. 🗑️ Хэрэггүй багануудыг устгах
columns_to_drop = [
    'Unnamed: 0', 'Unnamed: 6', 'id', 'name',
    'object_id', 'state_code.1'
]
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# 4. 🏷️ Категори багануудыг Label Encode хийх
label_encoders = {}
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    df[col] = df[col].fillna("unknown")
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 5. 📊 Тоон утгуудын хоосон утгыг дунджаар нөхөх
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())

# 6. 🧮 Features ба Target-г салгах
X = df.drop(columns=["status"])
y = df["status"]

# 7. ⚖️ Стандартад оруулах
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 8. 🧪 SMOTE ашиглаж balance хийх
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# 9. ✂️ Train-test split хийх
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42
)

# 10. 🔁 Logistic Regression сургалт
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# 11. 📈 Үр дүн
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"✅ Accuracy: {acc:.2%}")
print(f"📊 F1-score: {f1:.2%}")

# 12. 💾 Файлуудыг хадгалах
joblib.dump(model, "startup_model_logreg_smote.pkl")
joblib.dump(scaler, "scaler_logreg_smote.pkl")
joblib.dump(label_encoders, "label_encoders_logreg_smote.pkl")
joblib.dump(X.columns.tolist(), "feature_order_logreg_smote.pkl")
print("✅ Бүх .pkl файлууд SMOTE загвараар үүсгэгдлээ.")


✅ Accuracy: 77.82%
📊 F1-score: 77.25%
✅ Бүх .pkl файлууд SMOTE загвараар үүсгэгдлээ.




In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import joblib

# 1. 📥 CSV датаг унших
df = pd.read_csv("C:/Users/Dell/Downloads/Ecn325 data (1).csv")  # ← замаа өөрийн компьютерт тааруулна уу

# 2. 🎯 Target-г хөрвүүлэх
df = df[df['status'].isin(['acquired', 'closed'])].copy()
df['status'] = df['status'].map({'acquired': 1, 'closed': 0})

# 3. 🗑️ Хэрэггүй багануудыг хасах
columns_to_drop = ['Unnamed: 0', 'Unnamed: 6', 'id', 'name', 'object_id', 'state_code.1']
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# 4. 🏷️ Категори талбаруудыг Label Encode
label_encoders = {}
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    df[col] = df[col].fillna("unknown")
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# 5. 📊 Тоон талбаруудын null-уудыг дунджаар нөхөх
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())

# 6. 🧮 Features болон Target-г салгах
X = df.drop(columns=["status"])
y = df["status"]

# 7. ⚖️ Стандартад оруулах
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 8. ✂️ Train/Test split хийх
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)

# 9. 🌲 RandomForestClassifier ашиглан сургалт
model = RandomForestClassifier(n_estimators=300, max_depth=12, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# 10. 📈 Үр дүн шалгах
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
print(f"🎯 Accuracy: {acc:.2%}")
print(f"📊 F1-score: {f1:.2%}")

# 11. 💾 Бүх .pkl файлуудыг хадгалах
joblib.dump(model, "startup_model_initial.pkl")
joblib.dump(scaler, "scaler_initial.pkl")
joblib.dump(label_encoders, "label_encoders_initial.pkl")
joblib.dump(X.columns.tolist(), "feature_order_initial.pkl")
print("✅ Эхний сургалтын бүх .pkl файлууд үүсгэгдлээ.")


🎯 Accuracy: 78.92%
📊 F1-score: 84.58%
✅ Эхний сургалтын бүх .pkl файлууд үүсгэгдлээ.
