### Import Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Import Dataset

In [40]:
df = pd.read_csv("jobs_raw.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27482 entries, 0 to 27481
Data columns (total 29 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   job_title              27482 non-null  object 
 1   job_reference          27482 non-null  object 
 2   employer               27482 non-null  object 
 3   employer_postcode      27482 non-null  object 
 4   department             23851 non-null  object 
 5   location               27482 non-null  object 
 6   salary                 27478 non-null  object 
 7   job_type               27482 non-null  object 
 8   working_pattern        23040 non-null  object 
 9   pay_scheme             27482 non-null  object 
 10  pay_band               15840 non-null  object 
 11  staff_group            27482 non-null  object 
 12  specialty              22554 non-null  object 
 13  full_description       27482 non-null  object 
 14  json_salary_max        23455 non-null  float64
 15  js

### FR1: Highest and Lowest Paid Jobs

**Highest paid jobs** 

In [20]:
hpj = df[["job_title", "job_reference", "employer", "json_salary_max"]].dropna().sort_values(by = "json_salary_max",
                                                                                             ascending = False)
hpj.head()

Unnamed: 0,job_title,job_reference,employer,json_salary_max
10741,Salaried GP Practitioner (2-4 sessions),J180-SALARIEDGP0418,Central Advertising - General Practitioners,9000000.0
8680,Staff Grade in General Surgery,J313-A-19-0369,Health and Community Services Jersey,8195110.0
19534,Locum Consultant in Neurology,428-A-19-12817,Poole Hospital NHS Foundation Trust,7791000.0
4260,Clinical fellow in UGI surgery,321-S6-1005-CR-A,Oxford University Hospitals NHS Foundation Trust,4908600.0
4344,Clinical Fellow MS Service,321-S6-1028-CR,Oxford University Hospitals NHS Foundation Trust,4908600.0


**Lowest paid jobs**

In [22]:
lpj = df[["job_title", "job_reference", "employer", "json_salary_min"]].dropna().sort_values(by = "json_salary_min")
lpj.head()

Unnamed: 0,job_title,job_reference,employer,json_salary_min
5553,Consultant Old Age Psychiatry,311-M003-19,Pennine Care NHS Foundation Trust,103.0
8853,Consultant Old Age Psychiatry,311-M009-19,Pennine Care NHS Foundation Trust,103.0
21429,Consultant Old Age Psychiatry,311-M003-19-B,Pennine Care NHS Foundation Trust,103.0
14052,Consultant Old Age Psychiatry,311-M003-19-A,Pennine Care NHS Foundation Trust,103.0
3158,Bank General Medicine Consultant,192-A-19-23698,Portsmouth Hospitals NHS Trust Staff Bank,112.0


### FR2: Top 5 employers with the most pay gaps 

In [29]:
epg = df.groupby(["employer"])["json_salary_max"].agg(["max", "min"]).dropna().reset_index()
epg.columns = ["employer", "maximum salary", "minimum_salary"]
epg["pay_gap"] = epg["maximum salary"] - epg["minimum_salary"]
epg.sort_values(by = "pay_gap", ascending = False).head()

Unnamed: 0,employer,maximum salary,minimum_salary,pay_gap
58,Central Advertising - General Practitioners,9000000.0,146.0,8999854.0
157,Health and Community Services Jersey,8195110.0,2048.0,8193062.0
291,Poole Hospital NHS Foundation Trust,7791000.0,27146.0,7763854.0
284,Oxford University Hospitals NHS Foundation Trust,4908600.0,33034.0,4875566.0
182,King's College Hospital NHS Foundation Trust,1072014.0,29308.0,1042706.0


### FR3: Top 5 Months that most of the job advertisements occur

In [53]:
ja = df["json_date_posted"].str.split("T", expand = True)[0]
ja = pd.to_datetime(ja).dt.month_name()
ja.value_counts().head()

October    4276
July       3823
May        3733
June       3276
April      3229
Name: 0, dtype: int64