## **1. Import Libraries**

In [106]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler

### **2. Load Data**

In [112]:
df = pd.read_csv("cleaned_dataset_new.csv")

### **3. Features and Target**

In [118]:
df.columns

Index(['status_encoded', 'relationships', 'funding_per_milestone',
       'mean_funding_by_country', 'funding_relative_to_country',
       'rounds_relative_to_category', 'founded_year', 'founded_month',
       'milestone_duration', 'category_code_advertising',
       'category_code_consulting', 'category_code_ecommerce',
       'category_code_enterprise', 'category_code_games_video',
       'category_code_public_relations', 'category_code_software',
       'category_code_web', 'country_code_CAN', 'country_code_FRA',
       'country_code_GBR', 'country_code_IND', 'state_code_FL',
       'state_code_IL', 'state_code_MA', 'state_code_NJ', 'state_code_TX',
       'state_code_WA'],
      dtype='object')

In [120]:
df['status_encoded']

0        3
1        3
2        3
3        3
4        3
        ..
47613    3
47614    3
47615    3
47616    3
47617    3
Name: status_encoded, Length: 47618, dtype: int64

In [124]:
X = df.drop("status_encoded", axis=1)  
y = df["status_encoded"]   

In [126]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### **4. Train-Test Split**

In [130]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

### **5. Train SVM with One-vs-Rest**

In [133]:
svm_model = OneVsRestClassifier(
    SVC(kernel="linear", probability=True, random_state=42)
)

In [135]:
svm_model.fit(X_train, y_train)

### **6. Predictions and Evaluation**

In [138]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9044519109617808

Classification Report:
               precision    recall  f1-score   support

    acquired       0.12      0.19      0.15       387
      closed       1.00      1.00      1.00       199
         ipo       0.00      0.00      0.00        27
   operating       0.96      0.94      0.95      8911

    accuracy                           0.90      9524
   macro avg       0.52      0.53      0.52      9524
weighted avg       0.93      0.90      0.91      9524


Confusion Matrix:
 [[  73    0    9  305]
 [   0  199    0    0]
 [   3    0    0   24]
 [ 531    0   38 8342]]


In [140]:
import numpy as np
unique, counts = np.unique(y, return_counts=True)
print(dict(zip(unique, counts)))

{0: 1936, 1: 994, 2: 134, 3: 44554}


### **7. Train SVM with class_weight**

In [143]:
svm_model = OneVsRestClassifier(
    SVC(kernel="linear", class_weight="balanced", probability=True, random_state=42)
)
svm_model.fit(X_train, y_train)

In [144]:
### **8. Predictions and Evaluation**

In [145]:
y_pred = svm_model.predict(X_test)

In [146]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.629462410751785

Classification Report:
               precision    recall  f1-score   support

    acquired       0.11      0.45      0.18       387
      closed       0.09      0.77      0.16       199
         ipo       0.03      0.41      0.05        27
   operating       0.97      0.63      0.77      8911

    accuracy                           0.63      9524
   macro avg       0.30      0.57      0.29      9524
weighted avg       0.92      0.63      0.73      9524


Confusion Matrix:
 [[ 174   48   44  121]
 [  10  153    0   36]
 [  15    1   11    0]
 [1361 1518  375 5657]]
