#Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

#Load the dataset

In [2]:
df = pd.read_csv("/content/startup data .csv")


#Clean the dataset

In [3]:
df.dropna(subset=["status"], inplace=True)
df["status"] = df["status"].map({"acquired": 1, "closed": 0})

drop_cols = [
    'name', 'id', 'object_id', 'city', 'state_code', 'zip_code', 'Unnamed: 6',
    'founded_at', 'closed_at', 'first_funding_at', 'last_funding_at',
    'state_code.1', 'labels'
]
df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True)
df.fillna(0, inplace=True)

#Select key features

In [4]:
selected_features = ['funding_total_usd', 'milestones', 'funding_rounds', 'relationships']
df = df[selected_features + ['status']]

#Split data

In [5]:
X = df.drop("status", axis=1)
y = df["status"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

#Train the model

In [6]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

#Evaluate

In [7]:
y_pred = rf_model.predict(X_test)
print("✅ Model Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))

✅ Model Accuracy: 0.7675675675675676

📊 Classification Report:
               precision    recall  f1-score   support

           0       0.68      0.63      0.66        65
           1       0.81      0.84      0.82       120

    accuracy                           0.77       185
   macro avg       0.75      0.74      0.74       185
weighted avg       0.76      0.77      0.77       185



#Ask user to input new values for prediction

In [8]:
print("\n📝 Enter details of a new startup to predict its status:")
funding_total_usd = float(input("Total Funding (USD): "))
milestones = int(input("Number of Milestones: "))
funding_rounds = int(input("Number of Funding Rounds: "))
relationships = int(input("Number of Relationships: "))


📝 Enter details of a new startup to predict its status:
Total Funding (USD): 35000000
Number of Milestones: 5
Number of Funding Rounds: 3
Number of Relationships: 12


#Prepare input dictionary and Convert to DataFrame with matching columns

In [9]:
input_data = {
    'funding_total_usd': funding_total_usd,
    'milestones': milestones,
    'funding_rounds': funding_rounds,
    'relationships': relationships
}
input_df = pd.DataFrame([input_data], columns=selected_features)

#Predict and show result

In [10]:
prediction = rf_model.predict(input_df)[0]
if prediction == 1:
    print("\n✅ Prediction: Startup will be Acquired (Success)")
else:
    print("\n❌ Prediction: Startup is likely to Close (Failure)")


✅ Prediction: Startup will be Acquired (Success)


#Save the trained model

In [11]:
import joblib

# Save the trained model
joblib.dump(rf_model, "model.pkl")

# Save the list of feature column names used for prediction
joblib.dump(selected_features, "features.pkl")

print("\n✅ model.pkl and features.pkl files have been saved.")



✅ model.pkl and features.pkl files have been saved.
