In [7]:
! pip install pandas
! pip install scikit-learn



In [8]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline

# Step 1: Load Data
def load_data(job_file, user_file):
    """
    Load job and user data from CSV files.
    """
    jobs = pd.read_csv(job_file)
    users = pd.read_csv(user_file)
    return jobs, users

# Step 2: Enhanced Feature Engineering
def create_weighted_features(jobs_df):
    """
    Create a weighted combined feature string for each job.
    Skills are given the highest weight, followed by location, company, job type, and experience level.
    """
    jobs_df["weighted_features"] = (
        " ".join([jobs_df["skills"]] * 3) + " " +
        " ".join([jobs_df["location"]] * 2) + " " +
        " ".join([jobs_df["company"]] * 2) + " " +
        " ".join([jobs_df["job_type"]] * 2) + " " +
        jobs_df["experience_level"]
    )
    return jobs_df

# Step 3: Hybrid Recommendation System with TF-IDF and User Interaction Data
def recommend_jobs(jobs_df, user_skills, user_location, user_experience, user_interactions=None, top_n=5):
    """
    Recommend jobs based on cosine similarity between the user profile and job features.
    Uses both content-based and collaborative filtering (if user interaction data is provided).
    """

    # Create user profile with weighted preferences
    user_profile = (
        " ".join([user_skills] * 3) + " " +
        " ".join([user_location] * 2) + " " +
        " ".join([user_interactions]) + " " +
        user_experience
    )
    
    # TF-IDF Vectorization
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform(
        pd.concat([jobs_df["weighted_features"], pd.Series(user_profile)])
    )

    # Compute cosine similarity between user profile and job postings
    similarity = cosine_similarity(vectors[-1], vectors[:-1])
    jobs_df["similarity"] = similarity.flatten()

    # If user interaction data (ratings or clicks) is provided, use collaborative filtering to enhance the recommendation
    if user_interactions is not None:
        # Here, a simple collaborative filtering matrix factorization (SVD) can be used for enhancing recommendations
        user_interaction_matrix = user_interactions.pivot(index='user_id', columns='job_id', values='interaction')
        svd = TruncatedSVD(n_components=20)
        user_latent_factors = svd.fit_transform(user_interaction_matrix.fillna(0))
        
        # Cosine similarity on latent factors for collaborative filtering
        cf_similarity = cosine_similarity(user_latent_factors, svd.components_)
        jobs_df["collaborative_similarity"] = cf_similarity.flatten()
        
        # Combine content-based similarity with collaborative filtering similarity
        jobs_df["final_similarity"] = (jobs_df["similarity"] + jobs_df["collaborative_similarity"]) / 2
    else:
        jobs_df["final_similarity"] = jobs_df["similarity"]

    # Return top N job recommendations based on final similarity
    recommendations = jobs_df.sort_values(by="final_similarity", ascending=False).head(top_n)
    return recommendations[["job_title", "skills", "location", "similarity", "final_similarity"]]

# Step 4: Model Training and Evaluation (for hybrid recommendation)
def train_and_evaluate(jobs_df, user_df):
    """
    Train and evaluate the recommendation model.
    Uses a hybrid approach (content-based + collaborative filtering).
    """
    # Use train-test split to evaluate the model
    train, test = train_test_split(user_df, test_size=0.2, random_state=42)
    
    # Train model on training data
    model_recommendations = []
    for _, user in train.iterrows():
        recommendations = recommend_jobs(
            jobs_df, 
            user["skills"], 
            user["preferred_location"], 
            user["experience_level"], 
            user_interactions=train, 
            top_n=5
        )
        model_recommendations.append(recommendations)
    
    # Evaluate the model using MSE or any other metric
    y_true = test["interaction"].values
    y_pred = [rec["final_similarity"].mean() for rec in model_recommendations]
    mse = mean_squared_error(y_true, y_pred)
    print(f"Model Evaluation (MSE): {mse}")

# Step 5: Main Execution Flow
def main():
    # Load data
    jobs_df, users_df = load_data("jobs.csv", "users.csv")
    
    # Engineer features
    jobs_df = create_weighted_features(jobs_df)
    
    # Select a user (first user in this example)
    user = users_df.iloc[0]
    user_skills = user["skills"]
    user_location = user["preferred_location"]
    user_experience = user["experience_level"]
    
    # Generate recommendations
    recommendations = recommend_jobs(jobs_df, user_skills, user_location, user_experience, top_n=5)
    
    # Display results
    print(f"Job Recommendations for {user['name']}:\n")
    print(recommendations.to_string(index=False))

    # Optionally, train and evaluate the model (if interactions data is provided)
    train_and_evaluate(jobs_df, users_df)

# Execute the main function
if __name__ == "__main__":
    main()


FileNotFoundError: [Errno 2] No such file or directory: 'jobs.csv'