# NZ_Admin_JOBS Data Wrangling Task

In [1]:
#loading library
import numpy as np
import pandas as pd

In [2]:
#loading the dataset
jobs=pd.read_excel("NZ_Admin_JOBS.xlsx")

## 1. Check the dataset

In [3]:
#check the first few rows 
jobs.head()

Unnamed: 0,字段1,字段1_link,字段2,字段3,字段4,字段5
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,,location: Bay of PlentyBay of Plentyarea: Taur...,"Featured,at,Private Advertiser",classification: Administration & Office Suppor...
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promo...,Avenues Orthodontics,location: Bay of PlentyBay of Plentyarea: Taur...,"Featured,at",classification: Administration & Office Suppor...
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=stand...,New Zealand Police,location: AucklandAuckland,"4d ago,at",classification: Administration & Office Suppor...
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=stand...,Kew Pacific Island Early Learning Centre,location: SouthlandSouthlandarea: Invercargill...,"1h ago,at",classification: Administration & Office Suppor...
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,,location: CanterburyCanterburyarea: Christchur...,"4d ago,at,Private Advertiser",classification: Administration & Office Suppor...


In [4]:
jobs.shape

(2708, 6)

In [5]:
#Change the columns name
jobs=jobs.rename(columns={"字段1":"Job_title","字段1_link":"Job_link","字段2":"Company_name","字段3":"Location","字段4":"Date","字段5":"Classfication"})

In [6]:
jobs.head()

Unnamed: 0,Job_title,Job_link,Company_name,Location,Date,Classfication
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,,location: Bay of PlentyBay of Plentyarea: Taur...,"Featured,at,Private Advertiser",classification: Administration & Office Suppor...
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promo...,Avenues Orthodontics,location: Bay of PlentyBay of Plentyarea: Taur...,"Featured,at",classification: Administration & Office Suppor...
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=stand...,New Zealand Police,location: AucklandAuckland,"4d ago,at",classification: Administration & Office Suppor...
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=stand...,Kew Pacific Island Early Learning Centre,location: SouthlandSouthlandarea: Invercargill...,"1h ago,at",classification: Administration & Office Suppor...
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,,location: CanterburyCanterburyarea: Christchur...,"4d ago,at,Private Advertiser",classification: Administration & Office Suppor...


In [7]:
#for job_title, we need to make the first alphabet is capital to unify the format 
jobs["Job_title"].str.capitalize()

0                              Administrator
1                               Receptionist
2               Prosecutions support officer
3       Early childhood centre administrator
4             Business support administrator
                        ...                 
2703                     Key account manager
2704                     Executive assistant
2705                  Temporary office roles
2706                  Temporary office roles
2707                     Executive assistant
Name: Job_title, Length: 2708, dtype: object

In [8]:
#check the missing value
jobs.isna().any()

Job_title        False
Job_link         False
Company_name      True
Location         False
Date             False
Classfication    False
dtype: bool

### There are only missing values in company_name column. We could locate to the rows that contain the missing values.

In [9]:
null_data = jobs[jobs.isnull().any(axis=1)]

In [10]:
null_data

Unnamed: 0,Job_title,Job_link,Company_name,Location,Date,Classfication
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,,location: Bay of PlentyBay of Plentyarea: Taur...,"Featured,at,Private Advertiser",classification: Administration & Office Suppor...
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,,location: CanterburyCanterburyarea: Christchur...,"4d ago,at,Private Advertiser",classification: Administration & Office Suppor...
19,Administrator,https://www.seek.co.nz/job/50604829?type=stand...,,location: ManawatuManawatuarea: Rest of Manawa...,"7d ago,at,Private Advertiser",classification: Administration & Office Suppor...
60,Administrator,https://www.seek.co.nz/job/50582301?type=stand...,,location: Bay of PlentyBay of Plentyarea: Taur...,"11d ago,at,Private Advertiser",classification: Administration & Office Suppor...
64,ADMINISTRATION SPECIALIST - CONSTRUCTION,https://www.seek.co.nz/job/50638765?type=stand...,,location: AucklandAucklandarea: Rodney & North...,"5h ago,at,Private Advertiser",classification: Administration & Office Suppor...
67,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,,location: Bay of PlentyBay of Plentyarea: Taur...,"Featured,at,Private Advertiser",classification: Administration & Office Suppor...
71,Administrator,https://www.seek.co.nz/job/50607763?type=stand...,,location: AucklandAucklandarea: Rodney & North...,"6d ago,at,Private Advertiser",classification: Administration & Office Suppor...
95,Export Admin Support,https://www.seek.co.nz/job/50583111?type=stand...,,location: Hawkes BayHawkes Bayarea: HastingsHa...,"11d ago,at,Private Advertiser",classification: Administration & Office Suppor...
135,Executive Assistant,https://www.seek.co.nz/job/50613522?type=stand...,,location: AucklandAucklandarea: Manukau & East...,"5d ago,at,Private Advertiser",classification: Administration & Office Suppor...
187,PERSONAL ASSISTANT - PART TIME,https://www.seek.co.nz/job/50588220?type=stand...,,location: AucklandAucklandarea: Auckland Centr...,"10d ago,at,Private Advertiser",classification: Administration & Office Suppor...




## 2. Data Wrangling
### 2.1 Column:Company_Name
#### For column "Company_Name", I also unify the format as the first letter is upperletter.

In [11]:
jobs["Company_name"].str.capitalize()

0                                            NaN
1                           Avenues orthodontics
2                             New zealand police
3       Kew pacific island early learning centre
4                                            NaN
                          ...                   
2703                       Hays talent solutions
2704                      One eighty recruitment
2705                       Asset recruitment ltd
2706                       Asset recruitment ltd
2707                      Alpha recruitment - nz
Name: Company_name, Length: 2708, dtype: object

### 2.2 Column:Location
#### For location, the ideal format is location:XX and add another column is area.
#### However, within the value in column location, there are location, area and also salary, with is duplicated with the column classfication. So the first step is to clean the salary part within location column.

In [12]:
jobs["Location_new"]=jobs["Location"].str.split(",",n=1,expand=True)[0]

In [13]:
jobs["Location_new"]

0       location: Bay of PlentyBay of Plentyarea: Taur...
1       location: Bay of PlentyBay of Plentyarea: Taur...
2                              location: AucklandAuckland
3       location: SouthlandSouthlandarea: Invercargill...
4       location: CanterburyCanterburyarea: Christchur...
                              ...                        
2703    location: AucklandAucklandarea: Auckland Centr...
2704    location: WellingtonWellingtonarea: Wellington...
2705       location: WaikatoWaikatoarea: HamiltonHamilton
2706       location: WaikatoWaikatoarea: HamiltonHamilton
2707                       location: WellingtonWellington
Name: Location_new, Length: 2708, dtype: object

### The next step is to extract the salary part from column classfication and put into a new column calls salary.

In [14]:
jobs["Salary"]=jobs["Location"].str.split(",",n=1,expand=True)[1]

In [15]:
jobs["Salary"]

0                              None
1                              None
2                              None
3                              None
4                              None
                   ...             
2703                           None
2704        Competitive hourly rate
2705    Competitive hourly rates $$
2706    Competitive hourly rates $$
2707                           None
Name: Salary, Length: 2708, dtype: object

### 2.3 Column: Area

### Then extract the area part from location columns and build a new column "area".

In [16]:
jobs["Area"]=jobs["Location_new"].str.split("area: ",n=1,expand=True)[1]

In [17]:
jobs["Area"]

0                           TaurangaTauranga
1                           TaurangaTauranga
2                                       None
3                   InvercargillInvercargill
4                   ChristchurchChristchurch
                        ...                 
2703        Auckland CentralAuckland Central
2704    Wellington CentralWellington Central
2705                        HamiltonHamilton
2706                        HamiltonHamilton
2707                                    None
Name: Area, Length: 2708, dtype: object

In [18]:
#Remove the duplicated words
def removeduplicated(x):
    if x!=None:
        trim=x.strip()
        index=(trim+trim).find(trim,1)
        if index==-1:
            return trim
        else:
            return trim[:index]
    if x==None:
        return None

In [19]:
jobs["Area"]=jobs["Area"].apply(lambda x:removeduplicated(x))

In [20]:
jobs["Area"]

0                 Tauranga
1                 Tauranga
2                     None
3             Invercargill
4             Christchurch
               ...        
2703      Auckland Central
2704    Wellington Central
2705              Hamilton
2706              Hamilton
2707                  None
Name: Area, Length: 2708, dtype: object

### 2.4 Column: Location
#### Now we start to clean the location.

In [21]:
jobs["Location_new"]=jobs["Location_new"].str.split("area: ",n=1,expand=True)[0]

In [22]:
jobs["Location_new"]

0       location: Bay of PlentyBay of Plenty
1       location: Bay of PlentyBay of Plenty
2                 location: AucklandAuckland
3               location: SouthlandSouthland
4             location: CanterburyCanterbury
                        ...                 
2703              location: AucklandAuckland
2704          location: WellingtonWellington
2705                location: WaikatoWaikato
2706                location: WaikatoWaikato
2707          location: WellingtonWellington
Name: Location_new, Length: 2708, dtype: object

In [23]:
jobs["Location_new"]=jobs["Location_new"].str.split(": ",n=1,expand=True)[1]

In [24]:
jobs["Location_new"]=jobs["Location_new"].apply(lambda x:removeduplicated(x))

In [25]:
jobs["Location_new"]

0       Bay of Plenty
1       Bay of Plenty
2            Auckland
3           Southland
4          Canterbury
            ...      
2703         Auckland
2704       Wellington
2705          Waikato
2706          Waikato
2707       Wellington
Name: Location_new, Length: 2708, dtype: object

In [26]:
jobs

Unnamed: 0,Job_title,Job_link,Company_name,Location,Date,Classfication,Location_new,Salary,Area
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,,location: Bay of PlentyBay of Plentyarea: Taur...,"Featured,at,Private Advertiser",classification: Administration & Office Suppor...,Bay of Plenty,,Tauranga
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promo...,Avenues Orthodontics,location: Bay of PlentyBay of Plentyarea: Taur...,"Featured,at",classification: Administration & Office Suppor...,Bay of Plenty,,Tauranga
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=stand...,New Zealand Police,location: AucklandAuckland,"4d ago,at",classification: Administration & Office Suppor...,Auckland,,
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=stand...,Kew Pacific Island Early Learning Centre,location: SouthlandSouthlandarea: Invercargill...,"1h ago,at",classification: Administration & Office Suppor...,Southland,,Invercargill
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,,location: CanterburyCanterburyarea: Christchur...,"4d ago,at,Private Advertiser",classification: Administration & Office Suppor...,Canterbury,,Christchurch
...,...,...,...,...,...,...,...,...,...
2703,Key Account Manager,https://www.seek.co.nz/job/50490062?type=stand...,Hays Talent Solutions,location: AucklandAucklandarea: Auckland Centr...,"27d ago,at",classification: Administration & Office Suppor...,Auckland,,Auckland Central
2704,Executive Assistant,https://www.seek.co.nz/job/50488000?type=stand...,one eighty recruitment,location: WellingtonWellingtonarea: Wellington...,"27d ago,at",Competitive hourly rate,Wellington,Competitive hourly rate,Wellington Central
2705,Temporary Office Roles,https://www.seek.co.nz/job/50524865?type=stand...,Asset Recruitment Ltd,location: WaikatoWaikatoarea: HamiltonHamilton...,"20d ago,at",Competitive hourly rates $$,Waikato,Competitive hourly rates $$,Hamilton
2706,Temporary Office Roles,https://www.seek.co.nz/job/50477118?type=stand...,Asset Recruitment Ltd,location: WaikatoWaikatoarea: HamiltonHamilton...,"28d ago,at",Competitive hourly rates $$,Waikato,Competitive hourly rates $$,Hamilton


In [27]:
# Remove location column 
jobs.drop("Location",inplace=True,axis=1)

In [28]:
jobs

Unnamed: 0,Job_title,Job_link,Company_name,Date,Classfication,Location_new,Salary,Area
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,,"Featured,at,Private Advertiser",classification: Administration & Office Suppor...,Bay of Plenty,,Tauranga
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promo...,Avenues Orthodontics,"Featured,at",classification: Administration & Office Suppor...,Bay of Plenty,,Tauranga
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=stand...,New Zealand Police,"4d ago,at",classification: Administration & Office Suppor...,Auckland,,
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=stand...,Kew Pacific Island Early Learning Centre,"1h ago,at",classification: Administration & Office Suppor...,Southland,,Invercargill
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,,"4d ago,at,Private Advertiser",classification: Administration & Office Suppor...,Canterbury,,Christchurch
...,...,...,...,...,...,...,...,...
2703,Key Account Manager,https://www.seek.co.nz/job/50490062?type=stand...,Hays Talent Solutions,"27d ago,at",classification: Administration & Office Suppor...,Auckland,,Auckland Central
2704,Executive Assistant,https://www.seek.co.nz/job/50488000?type=stand...,one eighty recruitment,"27d ago,at",Competitive hourly rate,Wellington,Competitive hourly rate,Wellington Central
2705,Temporary Office Roles,https://www.seek.co.nz/job/50524865?type=stand...,Asset Recruitment Ltd,"20d ago,at",Competitive hourly rates $$,Waikato,Competitive hourly rates $$,Hamilton
2706,Temporary Office Roles,https://www.seek.co.nz/job/50477118?type=stand...,Asset Recruitment Ltd,"28d ago,at",Competitive hourly rates $$,Waikato,Competitive hourly rates $$,Hamilton


### 2.5 Column:Date
#### Then the column post_date is needed to be cleaned. My idea is to extract the time only and get rid of other words.

In [29]:
jobs["Post_date"]=jobs["Date"].str.split(",",n=1,expand=True)[0]

In [30]:
jobs["date_temp"]=jobs["Date"].str.split(",",n=2,expand=True)[2]

In [31]:
jobs["date_temp"]

0       Private Advertiser
1                     None
2                     None
3                     None
4       Private Advertiser
               ...        
2703                  None
2704                  None
2705                  None
2706                  None
2707                  None
Name: date_temp, Length: 2708, dtype: object

In [32]:
#Merge two columns
jobs["Company_name"]=jobs["Company_name"].fillna(jobs["date_temp"])

In [33]:
jobs["Company_name"]

0                             Private Advertiser
1                           Avenues Orthodontics
2                             New Zealand Police
3       Kew Pacific Island Early Learning Centre
4                             Private Advertiser
                          ...                   
2703                       Hays Talent Solutions
2704                      one eighty recruitment
2705                       Asset Recruitment Ltd
2706                       Asset Recruitment Ltd
2707                      Alpha Recruitment - NZ
Name: Company_name, Length: 2708, dtype: object

In [34]:
def dateprocess(x):
    if x.strip()=="Featured":
        return None
    else:
        return x.split(" ")[0]

In [35]:
jobs["Post_date"]=jobs["Post_date"].apply(lambda x:dateprocess(x))

In [36]:
jobs["Post_date"]

0       None
1       None
2         4d
3         1h
4         4d
        ... 
2703     27d
2704     27d
2705     20d
2706     28d
2707     26d
Name: Post_date, Length: 2708, dtype: object

In [37]:
def datechange(x):
    if x!=None:
        if "h" in x:
            return "1d"
        if "m" in x:
            return str((int(x.strip("m")[0])*30))+"d"
        else:
            return x
    else:
        return None
    

In [38]:
jobs["Post_date"]=jobs["Post_date"].apply(lambda x:datechange(x))

In [39]:
jobs["Post_date"]=jobs["Post_date"].str[:-1]

In [40]:
# Remove temporary column 
jobs.drop("date_temp",inplace=True,axis=1)
jobs.drop("Date",inplace=True,axis=1)

In [41]:
jobs

Unnamed: 0,Job_title,Job_link,Company_name,Classfication,Location_new,Salary,Area,Post_date
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,Private Advertiser,classification: Administration & Office Suppor...,Bay of Plenty,,Tauranga,
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promo...,Avenues Orthodontics,classification: Administration & Office Suppor...,Bay of Plenty,,Tauranga,
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=stand...,New Zealand Police,classification: Administration & Office Suppor...,Auckland,,,4
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=stand...,Kew Pacific Island Early Learning Centre,classification: Administration & Office Suppor...,Southland,,Invercargill,1
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,Private Advertiser,classification: Administration & Office Suppor...,Canterbury,,Christchurch,4
...,...,...,...,...,...,...,...,...
2703,Key Account Manager,https://www.seek.co.nz/job/50490062?type=stand...,Hays Talent Solutions,classification: Administration & Office Suppor...,Auckland,,Auckland Central,27
2704,Executive Assistant,https://www.seek.co.nz/job/50488000?type=stand...,one eighty recruitment,Competitive hourly rate,Wellington,Competitive hourly rate,Wellington Central,27
2705,Temporary Office Roles,https://www.seek.co.nz/job/50524865?type=stand...,Asset Recruitment Ltd,Competitive hourly rates $$,Waikato,Competitive hourly rates $$,Hamilton,20
2706,Temporary Office Roles,https://www.seek.co.nz/job/50477118?type=stand...,Asset Recruitment Ltd,Competitive hourly rates $$,Waikato,Competitive hourly rates $$,Hamilton,28


## 2.6 Column: Classfication and subclassfication
#### By looking at classfication column, the words are duplicated. So the next step is to clean classfication column.

In [42]:
jobs["Subclassfication"]=jobs["Classfication"].str.split("subClassification: ",n=1,expand=True)[1]

In [43]:
jobs["Subclassfication"]=jobs["Subclassfication"].apply(lambda x:removeduplicated(x))

In [44]:
jobs["Subclassfication"]

0                   Office Management
1                       Receptionists
2                               Other
3           Administrative Assistants
4       Client & Sales Administration
                    ...              
2703    Client & Sales Administration
2704                             None
2705                             None
2706                             None
2707             PA, EA & Secretarial
Name: Subclassfication, Length: 2708, dtype: object

In [45]:
jobs["Classfication"]=jobs["Classfication"].str.split("subClassification: ",n=1,expand=True)[0]

In [46]:
jobs["Classfication"]=jobs["Classfication"].str.split("classification: ",n=1,expand=True)[1]

In [47]:
jobs["Classfication"]=jobs["Classfication"].apply(lambda x:removeduplicated(x))

In [48]:
jobs["Classfication"]

0       Administration & Office Support
1       Administration & Office Support
2       Administration & Office Support
3       Administration & Office Support
4       Administration & Office Support
                     ...               
2703    Administration & Office Support
2704                               None
2705                               None
2706                               None
2707    Administration & Office Support
Name: Classfication, Length: 2708, dtype: object

In [49]:
jobs

Unnamed: 0,Job_title,Job_link,Company_name,Classfication,Location_new,Salary,Area,Post_date,Subclassfication
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,Private Advertiser,Administration & Office Support,Bay of Plenty,,Tauranga,,Office Management
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promo...,Avenues Orthodontics,Administration & Office Support,Bay of Plenty,,Tauranga,,Receptionists
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=stand...,New Zealand Police,Administration & Office Support,Auckland,,,4,Other
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=stand...,Kew Pacific Island Early Learning Centre,Administration & Office Support,Southland,,Invercargill,1,Administrative Assistants
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,Private Advertiser,Administration & Office Support,Canterbury,,Christchurch,4,Client & Sales Administration
...,...,...,...,...,...,...,...,...,...
2703,Key Account Manager,https://www.seek.co.nz/job/50490062?type=stand...,Hays Talent Solutions,Administration & Office Support,Auckland,,Auckland Central,27,Client & Sales Administration
2704,Executive Assistant,https://www.seek.co.nz/job/50488000?type=stand...,one eighty recruitment,,Wellington,Competitive hourly rate,Wellington Central,27,
2705,Temporary Office Roles,https://www.seek.co.nz/job/50524865?type=stand...,Asset Recruitment Ltd,,Waikato,Competitive hourly rates $$,Hamilton,20,
2706,Temporary Office Roles,https://www.seek.co.nz/job/50477118?type=stand...,Asset Recruitment Ltd,,Waikato,Competitive hourly rates $$,Hamilton,28,


### 2.7 Column: Salary

In [50]:
jobs["Salary"]=jobs["Salary"][(jobs["Salary"].str.contains("\d"))==True]

In [51]:
jobs["Salary"]=jobs["Salary"].str.replace("to","-")

In [52]:
jobs["Salary"].unique()

array([nan, '$20 per hour', '$20 - $24.99 per hour', '$19 - $22 per hour',
       '$40,000 - $49,999', '$25 per hour', '$60 000 - $70 000 per annum',
       '$20.00 per hour', '$55,000 - $69,999', '$20.00 per hour.',
       '$21 per hour', '$55,000 - $64,999',
       '$50,000 - $55,000 plus benefits', '$45,000 - $49,999',
       '$55,000 - $60,000 per annum pro rata', '$48,835 - $63,221',
       '$20 - $29.99 per hour', '$24-$28', 'Up - $27ph', '8% holiday pay',
       '$45,000 - $59,999', 'NZD25 - NZD28 per hour', '$22 per hour',
       'NZD75000 per annum', '$75k - $85k p.a.',
       '$24 - $25 + 8% holiday pay!', '$50,000 per annum.',
       '$20 - $25 per hour', '$30 - $38 p.h. + 8% Holiday Pay',
       '$45-$55,000', '$50,000 - $54,999',
       'Up - $23 p.h. + + 8% Holiday Pay', 'Up - $22.00 p.h.',
       'NZD69,726-94,127p.a.+Retirement contributions',
       '$60k-$70k depending on experience!', '$22.00 - $23.00 phr',
       '$27 - $33 p.h. + 8% holiday pay',
       '$60K - $80

In [53]:
jobs[["lower_salary","higher_salary"]]=jobs["Salary"].str.split("-",n=1,expand=True)

In [54]:
jobs["lower_salary"].unique()

array([nan, '$20 per hour', '$20 ', '$19 ', '$40,000 ', '$25 per hour',
       '$60 000 ', '$20.00 per hour', '$55,000 ', '$20.00 per hour.',
       '$21 per hour', '$50,000 ', '$45,000 ', '$48,835 ', '$24', 'Up ',
       '8% holiday pay', 'NZD25 ', '$22 per hour', 'NZD75000 per annum',
       '$75k ', '$24 ', '$50,000 per annum.', '$30 ', '$45', 'NZD69,726',
       '$60k', '$22.00 ', '$27 ', '$60K ', '$60 ', '$35 ', 'NZD19 ',
       '$60,000 pa', '$21 ', '$60,000 ', '$21', '$40 ', '$18.50 ',
       '$25.5', '$25 ', '$48,000 ', '$50k ', '$70', 'Circa 50k'],
      dtype=object)

In [55]:
jobs["lower_salary"]=jobs["lower_salary"].str.replace(r'([Kk])',"000")

  jobs["lower_salary"]=jobs["lower_salary"].str.replace(r'([Kk])',"000")


In [56]:
jobs["lower_salary"]=jobs["lower_salary"].str.replace(r'(8% holiday pay)',"")

  jobs["lower_salary"]=jobs["lower_salary"].str.replace(r'(8% holiday pay)',"")


In [57]:
jobs["lower_salary"]=jobs["lower_salary"].str.extract(r'(\d+\.\d.|\d+)')

In [58]:
jobs["lower_salary"].unique()

array([nan, '20', '19', '40', '25', '60', '20.00', '55', '21', '50', '45',
       '48', '24', '22', '75000', '30', '69', '60000', '22.00', '27',
       '35', '18.50', '50000', '70'], dtype=object)

In [59]:
jobs["higher_salary"].unique()

array([nan, None, ' $24.99 per hour', ' $22 per hour', ' $49,999',
       ' $70 000 per annum', ' $69,999', ' $64,999',
       ' $55,000 plus benefits', ' $60,000 per annum pro rata',
       ' $63,221', ' $29.99 per hour', '$28', ' $27ph', ' $59,999',
       ' NZD28 per hour', ' $85k p.a.', ' $25 + 8% holiday pay!',
       ' $25 per hour', ' $38 p.h. + 8% Holiday Pay', '$55,000',
       ' $54,999', ' $23 p.h. + + 8% Holiday Pay', ' $22.00 p.h.',
       '94,127p.a.+Retirement contributions',
       '$70k depending on experience!', ' $23.00 phr',
       ' $33 p.h. + 8% holiday pay', ' $80K experience dependent',
       ' $65k + Career Progression - QS / leadership', ' $39.99 per hour',
       ' $24.99 per hour dependent on experience.',
       ' $27 p.h. + 8% Holiday Pay', ' NZD23 per hour',
       ' $25 p.h. + 8% Holiday Pay', '$23 per hour', ' $45 per hour',
       ' $23.50 per hour', ' $35 p.h. + 8% Holiday Pay',
       ' $22 p.h. + 8% Holiday Pay', ' NZD30 per hour', ' $40 per hour',

In [60]:
jobs["higher_salary"]=jobs["higher_salary"].str.replace(r'([Kk])',"000")

  jobs["higher_salary"]=jobs["higher_salary"].str.replace(r'([Kk])',"000")


In [61]:
jobs["higher_salary"]=jobs["higher_salary"].str.replace(r'(8% [hH]oliday [pP]ay*)',"")

  jobs["higher_salary"]=jobs["higher_salary"].str.replace(r'(8% [hH]oliday [pP]ay*)',"")


In [62]:
jobs["higher_salary"].unique()

array([nan, None, ' $24.99 per hour', ' $22 per hour', ' $49,999',
       ' $70 000 per annum', ' $69,999', ' $64,999',
       ' $55,000 plus benefits', ' $60,000 per annum pro rata',
       ' $63,221', ' $29.99 per hour', '$28', ' $27ph', ' $59,999',
       ' NZD28 per hour', ' $85000 p.a.', ' $25 + !', ' $25 per hour',
       ' $38 p.h. + ', '$55,000', ' $54,999', ' $23 p.h. + + ',
       ' $22.00 p.h.', '94,127p.a.+Retirement contributions',
       '$70000 depending on experience!', ' $23.00 phr', ' $33 p.h. + ',
       ' $80000 experience dependent',
       ' $65000 + Career Progression - QS / leadership',
       ' $39.99 per hour', ' $24.99 per hour dependent on experience.',
       ' $27 p.h. + ', ' NZD23 per hour', ' $25 p.h. + ', '$23 per hour',
       ' $45 per hour', ' $23.50 per hour', ' $35 p.h. + ',
       ' $22 p.h. + ', ' NZD30 per hour', ' $40 per hour', ' $55,000',
       ' $65000 p.a. + plus 000iwisaver', '$80000+Super', ' $30 per hour',
       ' $65,000 depending on 

In [63]:
jobs["higher_salary"]=jobs["higher_salary"].str.extract('(\d+\.\d+|\d+ \d+|\d+)')

In [68]:
jobs["higher_salary"]=jobs["higher_salary"].replace("70 000","70000")

In [69]:
jobs["higher_salary"].unique()

array([nan, '24.99', '22', '49', '70000', '69', '64', '55', '60', '63',
       '29.99', '28', '27', '59', '85000', '25', '38', '54', '23',
       '22.00', '94', '23.00', '33', '80000', '65000', '39.99', '45',
       '23.50', '35', '30', '40', '65', '75000'], dtype=object)

In [70]:
def yeartohour(x):
    if float(x)>1000:
        return round(float(x)/(50*38),2)
    else:
        return x

In [71]:
jobs["higher_salary"]=jobs["higher_salary"].apply(lambda x:yeartohour(x))

In [72]:
jobs["higher_salary"].unique()

array([nan, '24.99', '22', '49', 36.84, '69', '64', '55', '60', '63',
       '29.99', '28', '27', '59', 44.74, '25', '38', '54', '23', '22.00',
       '94', '23.00', '33', 42.11, 34.21, '39.99', '45', '23.50', '35',
       '30', '40', '65', 39.47], dtype=object)

In [73]:
jobs["lower_salary"]=jobs["lower_salary"].apply(lambda x:yeartohour(x))

In [75]:
jobs.head(20)

Unnamed: 0,Job_title,Job_link,Company_name,Classfication,Location_new,Salary,Area,Post_date,Subclassfication,lower_salary,higher_salary
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,Private Advertiser,Administration & Office Support,Bay of Plenty,,Tauranga,,Office Management,,
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promo...,Avenues Orthodontics,Administration & Office Support,Bay of Plenty,,Tauranga,,Receptionists,,
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=stand...,New Zealand Police,Administration & Office Support,Auckland,,,4.0,Other,,
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=stand...,Kew Pacific Island Early Learning Centre,Administration & Office Support,Southland,,Invercargill,1.0,Administrative Assistants,,
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,Private Advertiser,Administration & Office Support,Canterbury,,Christchurch,4.0,Client & Sales Administration,,
5,Support Officer,https://www.seek.co.nz/job/50640393?type=stand...,Ministry for Primary Industries,Administration & Office Support,Northland,,Whangarei,270.0,Administrative Assistants,,
6,Support Officer,https://www.seek.co.nz/job/50615674?type=stand...,"Ministry of Business, Innovation and Employment",Administration & Office Support,Wellington,,Wellington Central,5.0,Other,,
7,office administrator,https://www.seek.co.nz/job/50640166?type=stand...,Hepburn Electrical Ltd,Administration & Office Support,Bay of Plenty,,Rotorua,210.0,Administrative Assistants,,
8,Office Administrator,https://www.seek.co.nz/job/50639248?type=stand...,Webster Holland Ltd,Administration & Office Support,Bay of Plenty,,Tauranga,1.0,Administrative Assistants,,
9,Administration Officer,https://www.seek.co.nz/job/50629393?type=stand...,New Zealand Police,Administration & Office Support,Canterbury,,,3.0,Other,,


In [76]:
jobs=jobs.rename(columns={"Location_new":"Location","Salary":"Salary_Range"})


In [77]:
jobs = jobs[['Job_title', 'Job_link', 'Company_name', 'Location', 'Area',"Post_date","Classfication","Subclassfication","lower_salary","higher_salary","Salary_Range"]]

In [78]:
jobs.head(20)

Unnamed: 0,Job_title,Job_link,Company_name,Location,Area,Post_date,Classfication,Subclassfication,lower_salary,higher_salary,Salary_Range
0,Administrator,https://www.seek.co.nz/job/50582301?type=promo...,Private Advertiser,Bay of Plenty,Tauranga,,Administration & Office Support,Office Management,,,
1,Receptionist,https://www.seek.co.nz/job/50620889?type=promo...,Avenues Orthodontics,Bay of Plenty,Tauranga,,Administration & Office Support,Receptionists,,,
2,Prosecutions Support Officer,https://www.seek.co.nz/job/50622169?type=stand...,New Zealand Police,Auckland,,4.0,Administration & Office Support,Other,,,
3,Early Childhood Centre Administrator,https://www.seek.co.nz/job/50639620?type=stand...,Kew Pacific Island Early Learning Centre,Southland,Invercargill,1.0,Administration & Office Support,Administrative Assistants,,,
4,Business Support Administrator,https://www.seek.co.nz/job/50622432?type=stand...,Private Advertiser,Canterbury,Christchurch,4.0,Administration & Office Support,Client & Sales Administration,,,
5,Support Officer,https://www.seek.co.nz/job/50640393?type=stand...,Ministry for Primary Industries,Northland,Whangarei,270.0,Administration & Office Support,Administrative Assistants,,,
6,Support Officer,https://www.seek.co.nz/job/50615674?type=stand...,"Ministry of Business, Innovation and Employment",Wellington,Wellington Central,5.0,Administration & Office Support,Other,,,
7,office administrator,https://www.seek.co.nz/job/50640166?type=stand...,Hepburn Electrical Ltd,Bay of Plenty,Rotorua,210.0,Administration & Office Support,Administrative Assistants,,,
8,Office Administrator,https://www.seek.co.nz/job/50639248?type=stand...,Webster Holland Ltd,Bay of Plenty,Tauranga,1.0,Administration & Office Support,Administrative Assistants,,,
9,Administration Officer,https://www.seek.co.nz/job/50629393?type=stand...,New Zealand Police,Canterbury,,3.0,Administration & Office Support,Other,,,


### 3. Save as excel

In [79]:
jobs.to_excel("admin_nz_14072022.xlsx",index=False) 