In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors


In [4]:

# Load data
df = pd.read_csv("final.csv")


In [5]:

# Combine features for matching
df["combined"] = (
    df["Preferred_technologies"].astype(str) + " " +
    df["Experience_level"].astype(str) + " " +
    df["Domain"].astype(str) + " " +
    df["budget"].astype(str)
)


In [6]:

# TF-IDF vectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["combined"])


In [7]:

# Nearest Neighbor Model
nn = NearestNeighbors(n_neighbors=3, metric='cosine')
nn.fit(X)


0,1,2
,n_neighbors,3
,radius,1.0
,algorithm,'auto'
,leaf_size,30
,metric,'cosine'
,p,2
,metric_params,
,n_jobs,


In [15]:
# Display all unique names in the Domain column
domain_names = set()
for domains in df["Domain"].dropna():
    for domain in domains.split(","):
        domain_names.add(domain.strip())

print(sorted(domain_names))

['AI', 'AR', 'Adventure', 'Analytics', 'App', 'Audio', 'Automation', 'Boutique', 'Builder', 'CRM', 'Checkout', 'Climate', 'Clinic', 'Coding', 'Community', 'Crafts', 'Crypto', 'Data', 'Drone', 'Education', 'Energy', 'Farming', 'Finance', 'Gaming', 'Green', 'Group', 'Health', 'Investment', 'IoT', 'Irrigation', 'Language', 'Lending', 'Literacy', 'Marketplace', 'Media', 'Music', 'Niche', 'Nomad', 'Nutrients', 'Online', 'Outdoor', 'Planner', 'Portal', 'Prototyping', 'Quest', 'Remote', 'Retail', 'Safety', 'Security', 'Seniors', 'Solo', 'Tasks', 'Telehealth', 'Tools', 'Tracker', 'Trends', 'Triage', 'Tutor', 'Urban', 'Video', 'Wallet', 'Waste', 'Websites', 'Wellness', 'Workout', 'Youth']


In [14]:
# Display all unique names in the Preferred_technologies column
preferred_tech_names = set()
for techs in df["Preferred_technologies"]:
    for tech in techs.split(","):
        preferred_tech_names.add(tech.strip())

print(sorted(preferred_tech_names))

['AI', 'API Design', 'API Integration', 'APIs', 'AR/VR', 'AR/VR Development', 'Blockchain', 'Chatbot Development', 'Cloud Computing', 'Cloud Infrastructure', 'Content Development', 'Content Management', 'Cryptography', 'Cybersecurity', 'Data Analytics', 'Data Science', 'Data Security', 'Database Management', 'Drone', 'E-commerce', 'Fitness', 'GIS', 'GPS', 'Gamification', 'IoT', 'Machine Learning', 'Mobile App', 'Mobile Development', 'NLP', 'No-Code', 'Payment Systems', 'Payments', 'SEO', 'Smart Contracts', 'Streaming', 'Sustainability', 'UI/UX Design', 'UX', 'Web', 'Web Development']


In [9]:
# Example: print all Preferred_technologies values
print(df["Preferred_technologies"])

0                    AI, NLP, Data Science, UI/UX Design
1              Web, APIs, Cloud Computing, Cybersecurity
2      UX, Payments, Mobile Development, API Integration
3           No-Code, UX, Database Management, API Design
4      Blockchain, Smart Contracts, Cryptography, Mob...
                             ...                        
324         Drone, Data Analytics, GIS, Machine Learning
325     E-commerce, UX, Web Development, Payment Systems
326    Streaming, AI, Data Science, Cloud Infrastructure
327    Fitness, Mobile App, UI/UX Design, Data Analytics
328        AI, NLP, Machine Learning, Content Management
Name: Preferred_technologies, Length: 329, dtype: object


In [11]:
df.head()

Unnamed: 0,Project_name,industry,Preferred_technologies,budget,Experience_level,description,Domain,combined
0,K-12 Adaptive Learning Platform,EdTech,"AI, NLP, Data Science, UI/UX Design",Low,Beginner,An AI-driven platform leveraging NLP to create...,"AI, App","AI, NLP, Data Science, UI/UX Design Beginner A..."
1,Remote Patient Monitoring System,HealthTech,"Web, APIs, Cloud Computing, Cybersecurity",Medium,Intermediate,A web-based health platform using APIs to enab...,"Health, Remote","Web, APIs, Cloud Computing, Cybersecurity Inte..."
2,TripSync Travel Assistant,TravelTech,"UX, Payments, Mobile Development, API Integration",High,Advanced,A sophisticated travel assistant app integrati...,"Nomad, Marketplace","UX, Payments, Mobile Development, API Integrat..."
3,ScaleEasy SaaS Builder,SaaS,"No-Code, UX, Database Management, API Design",Medium,Intermediate,A no-code SaaS platform with intuitive UX to e...,"Builder, Tools","No-Code, UX, Database Management, API Design I..."
4,Freelance Secure Finance,FinTech,"Blockchain, Smart Contracts, Cryptography, Mob...",Low,Beginner,A blockchain-based financial app using smart c...,"Crypto, Security","Blockchain, Smart Contracts, Cryptography, Mob..."


In [6]:

def recommend_project(tech, level, domain, budget):
    query = f"{tech} {level} {domain} {budget}"
    vec = vectorizer.transform([query])
    distances, indices = nn.kneighbors(vec)
    recommendations = df.iloc[indices[0]][["Project_name", "description", "industry"]]
    return recommendations.to_dict(orient='records')


In [14]:
# Example Input
tech = "AI, NLP, Data Science, UI/UX Design"
level = "Beginner"
domain = "Crypto, Security"
budget = "Low"

recommendations = recommend_project(tech, level, domain, budget)

# Display Results
for i, rec in enumerate(recommendations, 1):
    print(f"\nRecommendation {i}:")
    print(f"Project Name: {rec['Project_name']}")
    print(f"Industry: {rec['industry']}")
    print(f"Description: {rec['description']}")



Recommendation 1:
Project Name: K-12 Adaptive Learning Platform
Industry: EdTech
Description: An AI-driven platform leveraging NLP to create tailored learning paths for K-12 students, designed for beginner developers with a low budget.

Recommendation 2:
Project Name: KidsSpeak Language Tutor
Industry: EdTech
Description: An AI-powered platform using NLP to offer interactive language learning for kids, created by beginners with a low budget.

Recommendation 3:
Project Name: MarketStream Fintech App
Industry: FinTech
Description: A fintech app with AI-driven streaming for real-time market analysis, designed for advanced innovators with a low budget.


In [None]:
correct = 0
total = len(df)

for idx, row in df.iterrows():
    query = f"{row['Preferred_technologies']} {row['Experience_level']} {row['Domain']} {row['budget']}"
    vec = vectorizer.transform([query])
    distances, indices = nn.kneighbors(vec)

    # Check if the correct index appears in the top N results
    if idx in indices[0]:
        correct += 1

print(f"Reconstruction Accuracy: {correct / total * 100:.2f}%")

Reconstruction Accuracy: 97.26%
