In [59]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from faker import Faker
import random

In [60]:
# Utility functions for feature extraction and similarity calculation

def extract_features(projects):
    combined_texts = []
    for project in projects:
        description = project['description'].strip().lower()
        category = project['category'].strip().lower()
        location = project['location'].strip().lower()
        text = f"{description} {category} {location}"
        combined_texts.append(text)
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(combined_texts)
    return tfidf_matrix, vectorizer

def calculate_similarity(tfidf_matrix, user_vector):
    return cosine_similarity(tfidf_matrix, user_vector)

def create_user_vector(user_preferences, vectorizer):
    categories_text = ' '.join(user_preferences['preferred_categories'])
    countries_text = ' '.join(user_preferences['preferred_countries'])
    combined_preferences = f"{categories_text} {countries_text}"
    user_vector = vectorizer.transform([combined_preferences])
    return user_vector


In [61]:
# set up categories and countries
fake = Faker()
test_categories = [
    {'name': 'Technology'}, {'name': 'Healthcare'}, 
]

test_countries = [
    {'name': 'Cambodia'}, {'name': 'Indonesia'},
]

users = [{'username': f'testuser{i}'} for i in range(5)]

user_profiles = []
for user in users:
    for i in range(25):
        profile = {
            'user': user['username'],
            'email': f'user{i}@example.com',
            'company_name': f'Company {i}',
            'interests': f'Interest {i}',
            'preferred_categories': [random.choice(test_categories)['name']],
            'preferred_countries': [random.choice(test_countries)['name']]        
        }
        user_profiles.append(profile)

projects = []
for user in users:
    for j in range(25):
        title = f"Next-Gen {fake.catch_phrase()} Platform"
        description = f"This project aims to revolutionize {fake.bs()} using technologies like {fake.word(ext_word_list=['AI', 'Blockchain', 'IoT'])}."
                
        project = {
            'title': title,
            'description': description,
            'user': user['username'],
            'category': random.choice(test_categories)['name'],
            'location': random.choice(test_countries)['name'],
            'investment_sought': random.randint(10000, 50000) * j
        }
        projects.append(project)

In [62]:
def get_recommendations(user_preferences, projects):
    # Extract features from projects and create user vector
    tfidf_matrix, vectorizer = extract_features(projects)
    print(f"TF-IDF Matrix Shape: {tfidf_matrix.shape}")
    user_vector = create_user_vector(user_preferences, vectorizer)
    
    print(f"User Vector: {user_vector}")

    # Calculate similarity scores
    similarity_scores = calculate_similarity(tfidf_matrix, user_vector)
    for i, score in enumerate(similarity_scores.flatten()):
        print(f"Project {i} Similarity Score: {score}")

    ranked_project_indices = similarity_scores.argsort()[0][::-1].tolist()
    print(f"Ranked Project Indices: {ranked_project_indices}")

    # Select top project indices
    top_project_indices = ranked_project_indices[:5]
    print("Top Project Indices:", top_project_indices)

    # Fetch recommended projects by indices
    recommended_projects = [projects[index] for index in top_project_indices]

    return recommended_projects

In [63]:
# Test the recommendation function with a sample user's preferences

sample_user_preferences = {
    'preferred_categories': ['Technology'],
    'preferred_countries': ['Cambodia']
}

# Getting recommendations for a sample user
recommended_projects = get_recommendations(sample_user_preferences, projects)

# Display recommended projects
for project in recommended_projects:
    print(project)

TF-IDF Matrix Shape: (125, 163)
User Vector:   (0, 143)	0.6813531383583599
  (0, 20)	0.7319548489143395
Project 0 Similarity Score: 0.2841460647169597
Project 1 Similarity Score: 0.1250449361896886
Project 2 Similarity Score: 0.24887564494541137
Project 3 Similarity Score: 0.23745314278364732
Project 4 Similarity Score: 0.23474772653211348
Project 5 Similarity Score: 0.0
Project 6 Similarity Score: 0.0
Project 7 Similarity Score: 0.26099169858700627
Project 8 Similarity Score: 0.10884612032012417
Project 9 Similarity Score: 0.0
Project 10 Similarity Score: 0.26099169858700627
Project 11 Similarity Score: 0.2597361312521664
Project 12 Similarity Score: 0.13115975503394994
Project 13 Similarity Score: 0.13083634051101364
Project 14 Similarity Score: 0.14145912568605926
Project 15 Similarity Score: 0.25702736141029586
Project 16 Similarity Score: 0.25748785276572156
Project 17 Similarity Score: 0.0
Project 18 Similarity Score: 0.10355660566123526
Project 19 Similarity Score: 0.0
Project 2