In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

# Sample data for startups
startup_names = ['Gigtern', 'Level UP', 'SCALE UP', 'deposit2profit', 'TEAM IZET', 'Cosmic Innovators',
                 'Insight Innovators', 'Siddhant Soni & Company', 'LinkedList', 'Ecozyme', 'Divyam', 'Rad',
                 'Park Me', 'Mauryas', 'nexSUS', 'Elon Musk would worry', 'S-Tech Forge', 'E-gods', 'Panda',
                 'Farmally', 'Waraq', 'Megamerchant', 'SWABUN', 'Dropouts', 'CodeX', 'Trailblazers', 'A to Z',
                 'Hermony', 'Team Inaayat', 'Life Link', 'Healthify', 'NothingMuch', 'Dragons', 'TraBii',
                 'YOUnic', 'Home Eaze', 'Wellness Wizards', 'PUP Geeks', 'TerraTech', 'Project Lithium',
                 'Biotarians', 'Wet Wisdom', 'Titans', 'Ideators', 'BlockTrak', 'Team MV', 'Stylin']

additional_names = ['Startup' + str(i) for i in range(1, 750 - len(startup_names))]

# Concatenate the provided names with additional names
startup_names = startup_names + additional_names

# Generate random data for each parameter
investment_required = np.random.randint(100000, 500000, size=len(startup_names))
industry = np.random.choice(['Tech', 'Healthcare', 'Finance'], size=len(startup_names))
equity_offered = np.around(np.random.uniform(1, 15, size=len(startup_names)), decimals=1)
stage = np.random.choice(['Early', 'Mid', 'Late'], size=len(startup_names))

# investment_required = np.random.randint(100000, 1000001, size=len(startup_names))
# industry = np.random.choice(['Tech', 'Healthcare', 'Finance', 'Clothing', 'Other'], size=len(startup_names))
# equity_offered = np.random.uniform(1, 30, size=len(startup_names))
# stage = np.random.choice(['Early', 'Mid', 'Late'], size=len(startup_names))

startup_df = pd.DataFrame({
    'Startup': startup_names,
    'Investment_Required': investment_required,
    'Industry': industry,
    'Equity_Offered': equity_offered,
    'Stage': stage
})

# User input for investor preferences
print("Enter your preferences:")
user_investment_required = float(input("Investment required (in USD): "))
user_industry = input("Industry (Tech/Healthcare/Finance): ")
user_equity_offered = float(input("Equity offered: "))
user_stage = input("Stage (Early/Mid/Late): ")

# Create DataFrame for user preferences
user_df = pd.DataFrame({
    'Investment_Required': [user_investment_required],
    'Equity_Offered': [user_equity_offered]
})

# Standardize numerical features
scaler = StandardScaler()
startup_features = startup_df[['Investment_Required', 'Equity_Offered']]
startup_features_scaled = scaler.fit_transform(startup_features)
user_features_scaled = scaler.transform(user_df)

# Calculate cosine similarity
similarity_scores = cosine_similarity(startup_features_scaled, user_features_scaled)

# Add similarity scores to the DataFrame
startup_df['Similarity'] = similarity_scores.flatten()

# Filter and sort recommended startups
recommended_startups = startup_df[
    (startup_df['Industry'] == user_industry) &
    (startup_df['Stage'] == user_stage)
].sort_values(by='Similarity', ascending=False)

max_deviation = 4

# Filter recommended startups based on the deviation of equity offered
filtered_startups = recommended_startups[
    (recommended_startups['Equity_Offered'] >= user_equity_offered - max_deviation) &
    (recommended_startups['Equity_Offered'] <= user_equity_offered + max_deviation)
]
print(f"\n Total Results:{len(filtered_startups)}  of start Recommended Startups:")
filtered_startups

Enter your preferences:
Investment required (in USD): 125000
Industry (Tech/Healthcare/Finance): Healthcare
Equity offered: 15
Stage (Early/Mid/Late): Early

 Total Results:32  of start Recommended Startups:


Unnamed: 0,Startup,Investment_Required,Industry,Equity_Offered,Stage,Similarity
378,Startup332,134341,Healthcare,14.6,Early,0.999998
258,Startup212,131056,Healthcare,14.9,Early,0.999947
27,Hermony,153217,Healthcare,13.6,Early,0.999736
167,Startup121,111914,Healthcare,15.0,Early,0.999362
270,Startup224,140790,Healthcare,14.9,Early,0.99923
140,Startup94,170127,Healthcare,12.2,Early,0.994543
72,Startup26,233386,Healthcare,12.1,Early,0.979991
709,Startup663,235024,Healthcare,12.1,Early,0.977859
655,Startup609,108025,Healthcare,12.6,Early,0.96872
328,Startup282,222989,Healthcare,14.2,Early,0.953833


In [2]:
startup_df

Unnamed: 0,Startup,Investment_Required,Industry,Equity_Offered,Stage,Similarity
0,Gigtern,163335,Tech,1.5,Mid,0.679654
1,Level UP,402042,Healthcare,13.2,Late,-0.603089
2,SCALE UP,400493,Healthcare,5.8,Late,-0.817840
3,deposit2profit,395748,Tech,14.1,Early,-0.524469
4,TEAM IZET,254974,Healthcare,11.8,Early,0.329032
...,...,...,...,...,...,...
744,Startup698,468460,Tech,5.6,Mid,-0.903632
745,Startup699,268876,Healthcare,14.4,Late,0.110361
746,Startup700,227781,Healthcare,11.9,Late,0.485832
747,Startup701,422996,Healthcare,13.0,Late,-0.686967
