In [1]:
!pip install streamlit pyngrok joblib --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m44.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [8]:
# ----------------------------
# Install dependencies
# ----------------------------
!pip install streamlit pyngrok joblib --quiet

# ----------------------------
# Imports
# ----------------------------
import streamlit as st
import pandas as pd
import joblib
from pyngrok import ngrok
import numpy as np

# ----------------------------
# Load dataset
# ----------------------------
df = pd.read_csv("/content/all_upwork_jobs.csv", encoding="latin1", on_bad_lines="skip")

# Preprocess salaries
for c in ["hourly_low","hourly_high","budget"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

df["avg_hourly"] = df[["hourly_low","hourly_high"]].mean(axis=1)
df["hourly_monthly"] = df["avg_hourly"] * 160
df["effective_salary"] = df.apply(
    lambda r: r["hourly_monthly"] if pd.notna(r.get("hourly_monthly")) and r.get("is_hourly", False)
    else r.get("budget", 0),
    axis=1
)
df["text"] = df["title"].fillna("") + " " + df["country"].fillna("")

# ----------------------------
# Load models
# ----------------------------
vectorizer = joblib.load("/content/models/tfidf_vectorizer.pkl")
svd = joblib.load("/content/models/svd_model.pkl")
nn_model = joblib.load("/content/models/nn_model.pkl")

X_tfidf = vectorizer.transform(df["text"])
X_reduced = svd.transform(X_tfidf)

# ----------------------------
# Streamlit app
# ----------------------------
st.set_page_config(page_title="Job Recommendation App", layout="wide")
st.title("📌 Job Recommendation System")

job_index = st.number_input("Job Index:", min_value=0, max_value=len(df)-1, value=0)
num_rec = st.number_input("Number of Recommendations:", min_value=1, max_value=20, value=5)

if st.button("Get Recommendations"):
    distances, indices = nn_model.kneighbors([X_reduced[job_index]], n_neighbors=num_rec+1)
    recs = []
    for idx in indices[0]:
        if idx != job_index:
            salary = df.iloc[idx]["effective_salary"]
            recs.append({
                "Title": df.iloc[idx]["title"],
                "Country": df.iloc[idx]["country"],
                "Effective Salary": float(salary) if salary > 0 else "N/A"
            })
    st.table(pd.DataFrame(recs))

# ----------------------------
# Run Streamlit via ngrok
# ----------------------------
import os
import threading

def start_streamlit():
    os.system("streamlit run /content/streamlit_app.py")

# Save this code as a .py file
with open("/content/streamlit_app.py", "w") as f:
    f.write("""
import streamlit as st
import pandas as pd
import joblib
import numpy as np

df = pd.read_csv("/content/all_upwork_jobs.csv", encoding="latin1", on_bad_lines="skip")

for c in ["hourly_low","hourly_high","budget"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

df["avg_hourly"] = df[["hourly_low","hourly_high"]].mean(axis=1)
df["hourly_monthly"] = df["avg_hourly"] * 160
df["effective_salary"] = df.apply(
    lambda r: r["hourly_monthly"] if pd.notna(r.get("hourly_monthly")) and r.get("is_hourly", False)
    else r.get("budget", 0),
    axis=1
)
df["text"] = df["title"].fillna("") + " " + df["country"].fillna("")

vectorizer = joblib.load("/content/models/tfidf_vectorizer.pkl")
svd = joblib.load("/content/models/svd_model.pkl")
nn_model = joblib.load("/content/models/nn_model.pkl")
X_tfidf = vectorizer.transform(df["text"])
X_reduced = svd.transform(X_tfidf)

st.title("📌 Job Recommendation System")
job_index = st.number_input("Job Index:", min_value=0, max_value=len(df)-1, value=0)
num_rec = st.number_input("Number of Recommendations:", min_value=1, max_value=20, value=5)

if st.button("Get Recommendations"):
    distances, indices = nn_model.kneighbors([X_reduced[job_index]], n_neighbors=num_rec+1)
    recs = []
    for idx in indices[0]:
        if idx != job_index:
            salary = df.iloc[idx]["effective_salary"]
            recs.append({
                "Title": df.iloc[idx]["title"],
                "Country": df.iloc[idx]["country"],
                "Effective Salary": float(salary) if salary > 0 else "N/A"
            })
    st.table(pd.DataFrame(recs))
""")

# Start ngrok tunnel
NGROK_AUTH_TOKEN = "30WMYAsA1jiKQqsxhjzDBd6oamD_3jXCRjDbLja2dravFoxsY"
from pyngrok import ngrok
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
public_url = ngrok.connect(8501)
print(" * Streamlit public URL:", public_url)

# Run Streamlit
threading.Thread(target=lambda: os.system("streamlit run /content/streamlit_app.py")).start()


2025-09-07 23:02:26.519 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-09-07 23:02:26.528 Session state does not function when running a script without `streamlit run`


 * Streamlit public URL: NgrokTunnel: "https://3e74fd491cec.ngrok-free.app" -> "http://localhost:8501"


In [9]:
# Use official Python base image
FROM python:3.12-slim

# Set working directory
WORKDIR /app

# Copy all necessary files
COPY streamlit_app.py ./
COPY models ./models
COPY all_upwork_jobs.csv ./all_upwork_jobs.csv

# Install dependencies
RUN pip install --no-cache-dir streamlit pandas scikit-learn joblib

# Expose Streamlit default port
EXPOSE 8501

# Command to run Streamlit
CMD ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--browser.gatherUsageStats=false"]


SyntaxError: invalid syntax (ipython-input-2661329504.py, line 2)