In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
import joblib

In [40]:
df = pd.read_csv('data_science_job_cleaned.csv')
df.head()

Unnamed: 0,enrollee_id,city,city_development_index,gender,relevent_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,training_hours,target,training_hours_range
0,8949,city_103,0.92,Male,Has relevent experience,no_enrollment,Graduate,STEM,20.0,,,36.0,1.0,0-50
1,29725,city_40,0.776,Male,No relevent experience,no_enrollment,Graduate,STEM,15.0,S,Pvt Ltd,47.0,0.0,0-50
2,11561,city_21,0.624,Not specified,No relevent experience,Full time course,Graduate,STEM,5.0,,,83.0,0.0,51-100
3,33241,city_115,0.789,Not specified,No relevent experience,,Graduate,Business Degree,0.0,,Pvt Ltd,52.0,1.0,51-100
4,666,city_162,0.767,Male,Has relevent experience,no_enrollment,Masters,STEM,20.0,S,Funded Startup,8.0,0.0,0-50


In [41]:
df = df[df['gender'].isin(['Male', 'Female']) & df['company_size'].notna() & df['training_hours'].notna()]
df = df[['gender', 'training_hours', 'company_size', 'target']].dropna()
df.head()

Unnamed: 0,gender,training_hours,company_size,target
1,Male,47.0,S,0.0
4,Male,8.0,S,0.0
6,Male,24.0,S,0.0
7,Male,18.0,S,1.0
8,Male,46.0,S,1.0


In [42]:
# Encode
le_gender = LabelEncoder()
le_size = LabelEncoder()
df['gender'] = le_gender.fit_transform(df['gender'])
df['company_size'] = le_size.fit_transform(df['company_size'])

# Train
X = df[['gender', 'training_hours', 'company_size']]
y = df['target']
model = RandomForestClassifier(class_weight='balanced')
model.fit(X, y)

# Save model and encoders
joblib.dump(model, 'job_seeking_model.pkl')
joblib.dump(le_gender, 'gender_encoder.pkl')
joblib.dump(le_size, 'size_encoder.pkl')

['size_encoder.pkl']

In [43]:
from flask import Flask, request, jsonify
import joblib
import threading

app = Flask(__name__)
model = joblib.load('job_seeking_model.pkl')
gender_enc = joblib.load('gender_encoder.pkl')
size_enc = joblib.load('size_encoder.pkl')

@app.route("/predict", methods=["POST"])
def predict():
    data = request.json
    gender = gender_enc.transform([data["gender"]])[0]
    size = size_enc.transform([data["company_size"]])[0]
    hours = float(data["training_hours"])
    
    prediction = model.predict([[gender, hours, size]])[0]
    return jsonify({"seeking": bool(prediction)})

if __name__ == "__main__":
    def run_flask():
        app.run()

# Only run once
flask_thread = threading.Thread(target=run_flask)
flask_thread.start()

 * Serving Flask app '__main__'
 * Debug mode: off


Address already in use
Port 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.
On macOS, try disabling the 'AirPlay Receiver' service from System Preferences -> Sharing.


In [44]:
import requests

r = requests.post("https://job-predictor-api.onrender.com/predict", json={
    "gender": "Female",
    "training_hours": 50,
    "company_size": "M"  # or 'S' or 'L'
})
print(r.status_code)
print(r.json())


200
{'seeking': True}


In [45]:
# Try a job-seeker-like profile
r = requests.post("http://127.0.0.1:5000/predict", json={
    "gender": "Male",
    "training_hours": 150,
    "company_size": "S"
})
print(r.json())


127.0.0.1 - - [23/May/2025 11:22:37] "POST /predict HTTP/1.1" 200 -


{'seeking': True}
