In [None]:
!pip install dotenv
!pip install sqlalchemy
!pip install pandas


In [1]:

import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load credentials from .env
load_dotenv()

# Database connection info
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")

# Create SQLAlchemy engine
engine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)




##### Remote Jobs + Indeed Jobs Analysis Notebook

##### ======================================================
##### 📊 Remote Jobs - Descriptive Query
##### ======================================================

#### Business Question:
#### What are the average salary ranges grouped by job level?





In [11]:
pd.set_option('display.max_rows', None)


sql_query = '''
WITH salary_data AS (
    SELECT 
        job_level,
        annual_salary_min,
        annual_salary_max
    FROM remote_jobs
    WHERE annual_salary_min IS NOT NULL AND annual_salary_max IS NOT NULL
)
SELECT 
    job_level,
    COUNT(*) AS job_count,
    ROUND(AVG(annual_salary_min)) AS avg_salary_min,
    ROUND(AVG(annual_salary_max)) AS avg_salary_max
FROM salary_data
GROUP BY job_level
ORDER BY avg_salary_max DESC;
'''

remote_salary_df = pd.read_sql(sql_query, con=engine)
remote_salary_df


Unnamed: 0,job_level,job_count,avg_salary_min,avg_salary_max
0,Senior,1,74000.0,125000.0
1,Any,4,65000.0,78750.0



### Insight:
#### Senior level jobs tend to have a higher salary range on average.
#### Entry-level jobs offer significantly lower pay.

### Recommendation:
#### Focus job scraping on higher-level positions to target premium salary ranges.

### Prediction:
#### As remote work trends grow, mid-level roles may see upward salary adjustment.





#### ======================================================
#### 🕵️ Remote Jobs - Diagnostic Query
#### ======================================================

#### Business Question:
#### Which companies are posting the most high-paying jobs?


In [12]:

sql_query = '''
WITH high_salary_jobs AS (
    SELECT 
        company_name,
        annual_salary_max,
        ROW_NUMBER() OVER (PARTITION BY company_name ORDER BY annual_salary_max DESC) AS rank
    FROM remote_jobs
    WHERE annual_salary_max > 100000
)
SELECT 
    company_name,
    COUNT(*) AS high_paying_jobs
FROM high_salary_jobs
WHERE rank <= 5
GROUP BY company_name
ORDER BY high_paying_jobs DESC;
'''

remote_highpay_df = pd.read_sql(sql_query, con=engine)
remote_highpay_df


Unnamed: 0,company_name,high_paying_jobs
0,CoinTracker,1
1,Luxury Presence,1
2,Salesloft,1



#### Insight:
#### Certain companies consistently post high-paying jobs above 100k.

#### Recommendation:
#### Consider building partnerships with those companies or tracking their listings.

#### Prediction:
#### Their hiring trends could be indicators of high growth sectors.
