In [1]:
import pandas as pd
import random
from faker import Faker

# Initialize Faker to generate random data
fake = Faker()

# Constants
NUM_CLIENT_REQUESTS = 10000
NUM_FREELANCERS = 1000
INDUSTRIES = ['Technology', 'Marketing', 'Finance', 'Healthcare', 'Education']
SKILLS = [
    ['HTML', 'CSS', 'JavaScript', 'React'],
    ['Python', 'Django', 'Flask'],
    ['Java', 'Spring', 'Hibernate'],
    ['Adobe Illustrator', 'Photoshop', 'UI/UX Design'],
    ['SEO', 'Content Writing', 'Social Media Marketing'],
]

# Generate Freelancer Profiles
freelancer_profiles = []
for freelancer_id in range(1, NUM_FREELANCERS + 1):
    profile = {
        'Freelancer ID': freelancer_id,
        'Name': fake.name(),
        'Location': fake.city(),
        'Industry': random.choice(INDUSTRIES),
        'Skills': random.choice(SKILLS),
        'Experience Level': random.choice(['Junior', 'Mid-Level', 'Senior']),
        'Rating': round(random.uniform(3.0, 5.0), 1),
        'Hourly Rate': random.randint(20, 150),
    }
    freelancer_profiles.append(profile)

# Generate Client Requests
client_requests = []
for request_id in range(1, NUM_CLIENT_REQUESTS + 1):
    industry = random.choice(INDUSTRIES)
    request = {
        'Request ID': request_id,
        'Keywords': f"{random.choice(['I need', 'Looking for', 'Require'])} {random.choice(['talented', 'experienced', 'creative'])} {random.choice(['developer', 'designer', 'writer', 'marketer'])}",
        'Industry': industry,
        'Description': fake.sentence(nb_words=10),
        'Budget': random.randint(100, 5000),
        'Location': fake.city(),
        'Skills Required': random.choice(SKILLS),
        'Urgency': random.choice(['Immediate', 'Within a week', 'Flexible']),
    }
    client_requests.append(request)

# Generate Search Queries
search_queries = []
for query_id in range(1, NUM_CLIENT_REQUESTS + 1):
    search = {
        'Query ID': query_id,
        'Client ID': random.randint(1, NUM_CLIENT_REQUESTS),  # Random Client ID
        'Search Terms': client_requests[query_id - 1]['Keywords'],
        'Results Returned': ','.join([str(random.randint(1, NUM_FREELANCERS)) for _ in range(random.randint(1, 5))]),
        'Timestamp': fake.date_time_this_year(),
    }
    search_queries.append(search)

# Convert to DataFrame
freelancer_df = pd.DataFrame(freelancer_profiles)
client_requests_df = pd.DataFrame(client_requests)
search_queries_df = pd.DataFrame(search_queries)

# Save to CSV
freelancer_df.to_csv('freelancer_profiles.csv', index=False)
client_requests_df.to_csv('client_requests.csv', index=False)
search_queries_df.to_csv('search_queries.csv', index=False)

print("Dataset generated successfully!")


ModuleNotFoundError: No module named 'faker'

In [4]:

pip install pandas numpy scikit-learn tensorflow keras fastapi uvicorn

Defaulting to user installation because normal site-packages is not writeable
Collecting fastapi
  Downloading fastapi-0.115.0-py3-none-any.whl (94 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.6/94.6 KB[0m [31m720.1 kB/s[0m eta [36m0:00:00[0m kB/s[0m eta [36m0:00:01[0m:01[0m
[?25hCollecting uvicorn
  Downloading uvicorn-0.31.0-py3-none-any.whl (63 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 KB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Collecting starlette<0.39.0,>=0.37.2
  Downloading starlette-0.38.6-py3-none-any.whl (71 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.5/71.5 KB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4
  Downloading pydantic-2.9.2-py3-none-any.whl (434 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m434.9/434.9 KB[0m [31m1.8 MB

In [None]:
import pandas as pd

# Load the datasets
freelancer_df = pd.read_csv('freelancer_profiles.csv')
client_requests_df = pd.read_csv('client_requests.csv')
search_queries_df = pd.read_csv('search_queries.csv')

# Display the first few rows of each dataset
print(freelancer_df.head())
print(client_requests_df.head())
print(search_queries_df.head())


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer

# Merging client requests with freelancer profiles for training
merged_data = pd.merge(client_requests_df, freelancer_df, how='cross')

# Feature extraction
vectorizer = TfidfVectorizer()
X_requests = vectorizer.fit_transform(merged_data['Description'])  # Description from client requests
X_skills = vectorizer.transform(merged_data['Skills Required'])      # Skills from freelancer profiles

# Combine features (you might want to use different methods to combine)
X = np.hstack((X_requests.toarray(), X_skills.toarray()))

# Target variable: Freelancer ID to predict
y = merged_data['Freelancer ID'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from keras.models import Sequential
from keras.layers import Dense

# Define the model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(NUM_FREELANCERS, activation='softmax'))  # Output layer for multi-class classification

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')


In [None]:
# api integration 


In [None]:
# Save the Model
model.save('freelancer_model.h5')


In [None]:
# API Code Using FastAPI
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np
import tensorflow as tf
import joblib
import pandas as pd

# Initialize FastAPI app
app = FastAPI()

# Load the pre-trained model and other necessary components
model = tf.keras.models.load_model('freelancer_model.h5')
vectorizer = joblib.load('tfidf_vectorizer.pkl')  # Assuming you saved the vectorizer

# Example freelancer profiles (this would normally be fetched from a database)
freelancer_df = pd.read_csv('freelancer_profiles.csv')

# Define the input structure for the API
class ClientRequest(BaseModel):
    description: str
    skills_required: list
    budget: float
    location: str

# API Home route
@app.get("/")
def read_root():
    return {"message": "Freelancer Recommendation API"}

# Prediction route
@app.post("/recommend_freelancers")
def recommend_freelancers(request: ClientRequest):
    # Preprocess input (similar to what was done during training)
    X_description = vectorizer.transform([request.description])
    X_skills = vectorizer.transform([' '.join(request.skills_required)])  # Join skills into a string
    X = np.hstack((X_description.toarray(), X_skills.toarray()))

    # Make a prediction using the model
    predictions = model.predict(X)

    # Return top 3 freelancer IDs based on highest probabilities
    top_freelancer_indices = np.argsort(predictions[0])[-3:]  # Get the top 3 predictions
    top_freelancers = freelancer_df.iloc[top_freelancer_indices]

    # If no freelancers are found, return a 404 error
    if top_freelancers.empty:
        raise HTTPException(status_code=404, detail="No matching freelancers found")

    # Construct the response
    response = []
    for _, freelancer in top_freelancers.iterrows():
        response.append({
            "freelancer_id": freelancer['Freelancer ID'],
            "name": freelancer['Name'],
            "skills": freelancer['Skills'],
            "hourly_rate": freelancer['Hourly Rate'],
            "rating": freelancer['Rating'],
            "location": freelancer['Location']
        })

    return {"recommended_freelancers": response}


In [None]:
# Running the API
uvicorn main:app --reload
