In [6]:
from 5.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
project_tags = [
    ["html", "css", "javascript"],
    ["graphic design", "logo design"],
    ["content writing", "technology", "research"]
]

freelancers = [
    {"name": "John", "skills": "HTML, CSS, JavaScript", "rating": 4.5},
    {"name": "doe", "skills": "HTML, CSS, TypeScript", "rating": 4.8},
    {"name": "Emma", "skills": "Graphic Design, Adobe Illustrator", "rating": 4.1},
    {"name": "Stone", "skills": "Graphic Design, logo design", "rating": 4.2},
    {"name": "Michael", "skills": "Content Writing, Technology, Research", "rating": 4.8},
    {"name": "Jackson", "skills": "Content Writing, technology", "rating": 4.3}
]

In [8]:
project_texts = [' '.join(tags) for tags in project_tags]
freelancer_texts = [freelancer["skills"] for freelancer in freelancers]

In [15]:
freelancer_texts

['HTML, CSS, JavaScript',
 'HTML, CSS, TypeScript',
 'Graphic Design, Adobe Illustrator',
 'Graphic Design, logo design',
 'Content Writing, Technology, Research',
 'Content Writing, technology']

In [9]:
vectorizer = TfidfVectorizer()
project_vectors = vectorizer.fit_transform(project_texts)
freelancer_vectors = vectorizer.transform(freelancer_texts)

In [17]:
print(project_vectors)

  (0, 5)	0.5773502691896257
  (0, 1)	0.5773502691896257
  (0, 4)	0.5773502691896257
  (1, 6)	0.4082482904638631
  (1, 2)	0.8164965809277261
  (1, 3)	0.4082482904638631
  (2, 7)	0.5
  (2, 8)	0.5
  (2, 9)	0.5
  (2, 0)	0.5


In [10]:
similarities = cosine_similarity(project_vectors, freelancer_vectors)

In [13]:
similarities

array([[1.        , 0.81649658, 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.8660254 , 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 1.        ,
        0.8660254 ]])

In [11]:
def get_freelancers_for_project(project_index):
    fit_freelancers = []
    similarity_scores = similarities[project_index]
    for freelancer_index, similarity_score in enumerate(similarity_scores):
        if similarity_score > 0.75:  # Filter freelancers with similarity score > 0.75
            fit_freelancers.append({"index": freelancer_index, "similarity": similarity_score, "rating": freelancers[freelancer_index]["rating"]})
    
    fit_freelancers.sort(key=lambda x: x["rating"], reverse=True)
    return fit_freelancers

In [12]:
for i, tags in enumerate(project_tags):
    project_name = f"Project {i+1}"
    print(f"Project: {project_name}")
    print(f"Project Tags: {', '.join(tags)}")
    freelancers_for_project = get_freelancers_for_project(i)
    for freelancer_info in  freelancers_for_project:
        freelancer_name = freelancers[freelancer_info["index"]]["name"]
        rating = freelancer_info["rating"]
        similarity_score = freelancer_info["similarity"]
        print(f"- Freelancer: {freelancer_name}, Rating: {rating}, Similarity Score: {similarity_score:.2f}")
    print()

Project: Project 1
Project Tags: html, css, javascript
- Freelancer: doe, Rating: 4.8, Similarity Score: 0.82
- Freelancer: John, Rating: 4.5, Similarity Score: 1.00

Project: Project 2
Project Tags: graphic design, logo design
- Freelancer: Stone, Rating: 4.2, Similarity Score: 1.00
- Freelancer: Emma, Rating: 4.1, Similarity Score: 0.87

Project: Project 3
Project Tags: content writing, technology, research
- Freelancer: Michael, Rating: 4.8, Similarity Score: 1.00
- Freelancer: Jackson, Rating: 4.3, Similarity Score: 0.87

