### Import Required Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings(action='ignore')

### Read the Data

In [2]:
train = pd.read_csv('Final_Train_Dataset.csv',index_col=0)

In [3]:
test = pd.read_csv('Final_Test_Dataset.csv')

### Data Cleaning

In [4]:
train.isnull().sum()/train.shape[0]*100

experience               0.000000
job_description         22.310878
job_desig                0.000000
job_type                75.775174
key_skills               0.005050
location                 0.000000
salary                   0.000000
company_name_encoded     0.000000
dtype: float64

In [5]:
test.isnull().sum()/test.shape[0]*100

experience               0.000000
job_description         23.057113
job_desig                0.000000
job_type                75.200727
key_skills               0.000000
location                 0.000000
company_name_encoded     0.000000
dtype: float64

In [6]:
train = train[~train.key_skills.isna()]

In [7]:
train.job_description.fillna('Null',inplace=True)
test.job_description.fillna('Null',inplace=True)

In [8]:
train.job_type.fillna('Null',inplace=True)
test.job_type.fillna('Null',inplace=True)

### Feature Engineering

In [9]:
def spliter1(x):
    a,b = x.split('-')
    c,d = b.split(' ')
    return int(a)

In [10]:
train['min_experience'] = train.experience.apply(spliter1)
test['min_experience'] = test.experience.apply(spliter1)

In [11]:
def spliter2(x):
    a,b = x.split('-')
    c,d = b.split(' ')
    return int(c)

In [12]:
train['max_experience'] = train.experience.apply(spliter2)
test['max_experience'] = test.experience.apply(spliter2)

In [13]:
train.drop('experience',axis=1,inplace=True)
test.drop('experience',axis=1,inplace=True)

### Job Type

In [14]:
train.job_type.isna().sum()

0

In [15]:
train.job_type[train.job_type.isna()] = 'unknown'
test.job_type[test.job_type.isna()] = 'unknown'

In [16]:
def bucket1(x):
    if x == 'unknown':
        return 'unknown'
    else:
        return 'analytics'

In [17]:
train.job_type = train.job_type.apply(bucket1)
test.job_type = test.job_type.apply(bucket1)

In [18]:
train = pd.concat([train,pd.get_dummies(train['job_type'],dtype='int')],axis=1)
train.drop('job_type',axis=1,inplace=True)
test = pd.concat([test,pd.get_dummies(test['job_type'],dtype='int')],axis=1)
test.drop('job_type',axis=1,inplace=True)

### Location

In [19]:
def city(x):
    if len(x.split(','))>1:
        return 'Multiple Cities'
    else:
        return x

In [20]:
train.location = train.location.apply(city)
test.location = test.location.apply(city)

In [21]:
train.location.value_counts()

location
Bengaluru                        4168
Multiple Cities                  3172
Mumbai                           2507
Gurgaon                          1644
Pune                             1193
                                 ... 
Chennai(Adithanar Nagar+1)          1
Meerut                              1
MP                                  1
Delhi NCR(New Friends Colony)       1
Amravati                            1
Name: count, Length: 676, dtype: int64

In [22]:
Bengaluru = ['Bengaluru', 'Bengaluru Bangalore', 'Bengaluru(1st Phase JP Nagar)', 'Bengaluru(1st Stage Indira Nagar)', 'Bengaluru(1st Stage Indira Nagar+2)', 'Bengaluru(2nd Phase JP Nagar)', 'Bengaluru(3rd Phase JP Nagar)', 'Bengaluru(4th Block Koramangala)', 'Bengaluru(4th Phase JP Nagar)', 'Bengaluru(5th Phase JP Nagar)', 'Bengaluru(5th block Koramangala)', 'Bengaluru(6th Phase JP Nagar)', 'Bengaluru(6th block Koramangala)', 'Bengaluru(6th block Koramangala+1)', 'Bengaluru(7th Phase JP Nagar)', 'Bengaluru(7th Phase JP Nagar+3)', 'Bengaluru(Ambedkar Nagar+10)', 'Bengaluru(Anepalya)', 'Bengaluru(B Hosahalli)', 'Bengaluru(Banashankari)', 'Bengaluru(Banashankari+1)', 'Bengaluru(Bande Bommasandra+1)', 'Bengaluru(Bannerghatta Road)', 'Bengaluru(Basavangudi)', 'Bengaluru(Bellandur)', 'Bengaluru(Bommanahalli)', 'Bengaluru(Bommenahalli)', 'Bengaluru(Bommenahalli+6)', 'Bengaluru(Chikka Bommasandra)', 'Bengaluru(Dairy Circle)', 'Bengaluru(Dinnur)', 'Bengaluru(Dodda Nekkundi)', 'Bengaluru(Domlur)', 'Bengaluru(Domlur+1)', 'Bengaluru(Dooravani Nagar)', 'Bengaluru(EPIP Zone)', 'Bengaluru(Electronic City)', 'Bengaluru(Electronic City+1)', 'Bengaluru(Electronics City Phase 1)', 'Bengaluru(Frazer Town)', 'Bengaluru(Garebhavipalya+1)', 'Bengaluru(HMT Layout)', 'Bengaluru(HSR Layout)', 'Bengaluru(HSR Layout+1)', 'Bengaluru(Hebbal Kempapura+1)', 'Bengaluru(Hebbal)', 'Bengaluru(Hebbal+1)', 'Bengaluru(Hoskote)', 'Bengaluru(Indira Nagar)', 'Bengaluru(Indira Nagar+1)', 'Bengaluru(JP Nagar)', 'Bengaluru(Jayamahal Extension)', 'Bengaluru(Jayanagar)', 'Bengaluru(Jayanagar+1)', 'Bengaluru(Kaggadasapura)', 'Bengaluru(Kalyan nagar)', 'Bengaluru(Kanakapura Road)', 'Bengaluru(Kanakpura Road)', 'Bengaluru(Kannamangala+4)', 'Bengaluru(Kasturi Nagar)', 'Bengaluru(Kodihalli)', 'Bengaluru(Koramangala)', 'Bengaluru(Koramangala+1)', 'Bengaluru(Kudlu)', 'Bengaluru(Kundalahalli)', 'Bengaluru(Langford Gardens)', 'Bengaluru(Langford Road)', 'Bengaluru(Langford Road+2)', 'Bengaluru(Langford Town)', 'Bengaluru(Lavelle Road)', 'Bengaluru(Madiwala)', 'Bengaluru(Mahadeva Kodigehalli+1)', 'Bengaluru(Mahadevpura)', 'Bengaluru(Malur Hosur Road)', 'Bengaluru(Malur)', 'Bengaluru(Manayata Tech Park)', 'Bengaluru(Marathahalli)', 'Bengaluru(Marathahalli+1)', 'Bengaluru(Nagavara)', 'Bengaluru(Nagavarapalya)', 'Bengaluru(New Gurappana Palya)', 'Bengaluru(Old Airport Road)', 'Bengaluru(Old Airport Road+1)', 'Bengaluru(Outer Ring Road South)', 'Bengaluru(Palace Road)', 'Bengaluru(Panathur+1)', 'Bengaluru(Rajaji Nagar)', 'Bengaluru(Ramamurthy Nagar)', 'Bengaluru(Residency Road)', 'Bengaluru(Richmond Town)', 'Bengaluru(Sadashiva Nagar)', 'Bengaluru(Sahakara Nagar)', 'Bengaluru(Sanjay nagar)', 'Bengaluru(Sarjapur)', 'Bengaluru(Sarjapur+2)', 'Bengaluru(Sector 1 HSR Layout)', 'Bengaluru(Sector 1 HSR Layout+7)', 'Bengaluru(Sector 2 HSR Layout)', 'Bengaluru(Sector 6 HSR Layout)', 'Bengaluru(Sector 7 HSR Layout)', 'Bengaluru(Shivaji Nagar)', 'Bengaluru(Singasandra)', 'Bengaluru(Ulsoor)', 'Bengaluru(Vasanth nagar)', 'Bengaluru(Victoria Road)', 'Bengaluru(Vijay Nagar)', 'Bengaluru(Whitefield)', 'Bengaluru(Whitefield+1)', 'Bengaluru(Wilson Garden)', 'Bengaluru(Yelahanka New Town)', 'Bengaluru(Yelahanka+1)', 'Bengaluru(Yeshwanthpur)']
Mumbai = ['Mumbai', 'Mumbai Suburbs', 'Mumbai(Airoli)', 'Mumbai(Akurli)', 'Mumbai(Ambernath)', 'Mumbai(Bandra Kurla Complex)', 'Mumbai(Chakala)', 'Mumbai(Charni Road)', 'Mumbai(Chembur)', 'Mumbai(Churchgate)', 'Mumbai(Churi Wadi)', 'Mumbai(Colaba)', 'Mumbai(DN Nagar)', 'Mumbai(Fort)', 'Mumbai(Ghatkopar West)', 'Mumbai(Govandi)', 'Mumbai(Grant Road)', 'Mumbai(Kalbadevi)', 'Mumbai(Kalina)', 'Mumbai(Kannamwar Nagar II+2)', 'Mumbai(Kannamwar Nagar II+3)', 'Mumbai(Khar West)', 'Mumbai(Khar West+2)', 'Mumbai(LBS Marg)', 'Mumbai(Linking Road)', 'Mumbai(Lower Parel)', 'Mumbai(M I D C)', 'Mumbai(Mahakali Nagar)', 'Mumbai(Mahalaxmi)', 'Mumbai(Mahim United Industrial Estate)', 'Mumbai(Marine Lines)', 'Mumbai(Marol)', 'Mumbai(Masjid Bunder West)', 'Mumbai(Matunga)', 'Mumbai(Mindspace)', 'Mumbai(Mira Road)', 'Mumbai(Mulund)', 'Mumbai(Mumbadevi Area)', 'Mumbai(Mumbai Central)', 'Mumbai(Mumbra)', 'Mumbai(Murbad)', 'Mumbai(Nariman Point)', 'Mumbai(Pali Village)', 'Mumbai(Parle Colony)', 'Mumbai(Powai)', 'Mumbai(Powai+2)', 'Mumbai(Prabhadevi)', 'Mumbai(SEEPZ)', 'Mumbai(Sakinaka)', 'Mumbai(Shanti Nagar Borivali)', 'Mumbai(Sion Koliwada)', 'Mumbai(Sion Trombay Road)', 'Mumbai(Tarapur)', 'Mumbai(Tardeo)', 'Mumbai(Veera Desai Road)', 'Mumbai(Vidya Vihar West)', 'Mumbai(Vikhroli)', 'Mumbai(Wadala)', 'Mumbai(Worli)', 'Mumbai(chinchpokli+1)']
Gurgaon = ['Gurgaon', 'Gurgaon(B Block Sushant Lok Phase - I)', 'Gurgaon(Behrampur)', 'Gurgaon(C Block Sushant Lok Phase - I)', 'Gurgaon(Cyber City)', 'Gurgaon(Electronic City)', 'Gurgaon(Ghata)', 'Gurgaon(Khandsa)', 'Gurgaon(Kherki Daula)', 'Gurgaon(Palam Vihar)', 'Gurgaon(Sector-24 Gurgaon)', 'Gurgaon(Sohna Road)', 'Gurgaon(South City 2)', 'Gurgaon(Udyog Vihar Industrial Area Phase VI+1)', 'Gurgaon(Udyog Vihar)', 'Gurugram', 'Gurgaon(Behrampur)', 'Gurgaon(Southern Peripheral Road )']
Pune = ['Pune', 'Pune(Baner Pashan Link Road)', 'Pune(Baner Pashan Link Road+1)', 'Pune(Baner)', 'Pune(Bavdhan)', 'Pune(Bavdhan+1)', 'Pune(Bhandarkar Road)', 'Pune(Bhugaon)', 'Pune(Camp)', 'Pune(Chakan)', 'Pune(Chinchwad)', 'Pune(Dattawadi+1)', 'Pune(Deccan Gymkhana+1)', 'Pune(Dhole Patil Road)', 'Pune(Eon Free Zone )', 'Pune(Hadapsar)', 'Pune(Kalewadi Phata)', 'Pune(Kalyani Nagar)', 'Pune(Karve Road)', 'Pune(Kharadi)', 'Pune(Kondhwa)', 'Pune(Koregaon Bhima)', 'Pune(Koregaon Park)', 'Pune(Kothrud)', 'Pune(Laxmi Road)', 'Pune(Loni)', 'Pune(Maan)', 'Pune(Mulshi)', 'Pune(NIBM)', 'Pune(Navi Peth)', 'Pune(Parvati Darshan)', 'Pune(Paud Road)', 'Pune(Phursungi)', 'Pune(Pune Nashik Highway)', 'Pune(Rajgurunagar)', 'Pune(Sector No-7 Bhosari)', 'Pune(Senapati Bapat Road)', 'Pune(Shikrapur)', 'Pune(Shivaji Nagar)', 'Pune(Viman Nagar)', 'Pune(Viman Nagar+4)', 'Pune(Vishrantwadi)', 'Pune(Wakad)', 'Pune(Wakad+3)', 'Pune(Wanawari)', 'Pune(Yavat)', 'Pune(Yerwada)']
Hyderabad = ['Hyderabad', 'Hyderabad(Ameerpet)', 'Hyderabad(Bachupally)', 'Hyderabad(Balanagar)', 'Hyderabad(Banjara hills)', 'Hyderabad(Begumpet)', 'Hyderabad(Begumpet+1)', 'Hyderabad(Cherlapally)', 'Hyderabad(Film Nagar)', 'Hyderabad(Gachibowli)', 'Hyderabad(Hyderbasthi+2)', 'Hyderabad(Jubilee Hills)', 'Hyderabad(Kompally)', 'Hyderabad(Kondapur)', 'Hyderabad(Koti)', 'Hyderabad(Kukatpally)', 'Hyderabad(Madhapur)', 'Hyderabad(Madhapur+1)', 'Hyderabad(Manikonda)', 'Hyderabad(Nacharam)', 'Hyderabad(Nagaram)', 'Hyderabad(Nanakramguda)', 'Hyderabad(Nanakramguda+1)', 'Hyderabad(Pashamailaram)', 'Hyderabad(Sanath Nagar)', 'Hyderabad(Shamirpet)', 'Hyderabad(Shamshabad)', 'Hyderabad(Somajiguda)', 'Hyderabad(Sundar Nagar)', 'Hyderabad(Tarnaka)', 'Hyderabad(Uppal)']
Chennai = ['Chennai', 'Chennai(Adithanar Nagar+1)', 'Chennai(Adyar)', 'Chennai(Alandur)', 'Chennai(Alwarpet)', 'Chennai(Alwarthirunagar)', 'Chennai(Aminjikarai)', 'Chennai(Anna Industrial Estate)', 'Chennai(Anna Nagar East)', 'Chennai(Anna Nagar Extension)', 'Chennai(Anna Nagar)', 'Chennai(Anna Salai)', 'Chennai(Annai Nagar)', 'Chennai(Arumbakkam)', 'Chennai(Cathedral Road)', 'Chennai(City Centre+1)', 'Chennai(Egmore)', 'Chennai(Ekkaduthangal)', 'Chennai(GST Road)', 'Chennai(Gopalapuram)', 'Chennai(Irungattukottai)', 'Chennai(Kadaperi)', 'Chennai(Kandanchavadi)', 'Chennai(Kilpauk)', 'Chennai(Kodambakkam)', 'Chennai(Kotturpuram)', 'Chennai(Kovilambakkam)', 'Chennai(MRC Nagar)', 'Chennai(Manapakkam)', 'Chennai(Maraimalai Nagar)', 'Chennai(Maraimalai Nagar+1)', 'Chennai(Mount Road)', 'Chennai(Mylapore)', 'Chennai(Mylapore+2)', 'Chennai(Nandambakkam)', 'Chennai(Navalur)', 'Chennai(Nungambakkam)', 'Chennai(OMR)', 'Chennai(Okkiyam Thuraipakkam)', 'Chennai(Oragadam)', 'Chennai(Pattaravakkam)', 'Chennai(Perungudi)', 'Chennai(Ramapuram)', 'Chennai(Royapettah)', 'Chennai(Sholinganallur)', 'Chennai(Sholinganallur+1)', 'Chennai(St Thomas Mount)', 'Chennai(Taramani)', 'Chennai(Teynampet)', 'Chennai(Thandalam)', 'Chennai(Thiyagaraya Nagar)', 'Chennai(Tiruvanmiyur)', 'Chennai(Vadapalani)', 'Chennai(West Mambalam)', 'Chennai(West Tambaram)']
Delhi = ['Delhi', 'Delhi NCR', 'Delhi NCR(AD Block Pitampura)', 'Delhi NCR(Badarpur)', 'Delhi NCR(Bhikaji Cama+1)', 'Delhi NCR(Connaught Place)', 'Delhi NCR(Cyber City)', 'Delhi NCR(DaryaGanj)', 'Delhi NCR(Ghitorni)', 'Delhi NCR(Greater Kailash)', 'Delhi NCR(Gwal Pahari)', 'Delhi NCR(IMT Manesar)', 'Delhi NCR(Jasola)', 'Delhi NCR(Jharsa)', 'Delhi NCR(Karol Bagh)', 'Delhi NCR(Kirti Nagar)', 'Delhi NCR(Kundli)', 'Delhi NCR(Laxmi Nagar)', 'Delhi NCR(Mathura Road)', 'Delhi NCR(Mohan Co-operative)', 'Delhi NCR(Moti Nagar)', 'Delhi NCR(Nehru Place)', 'Delhi NCR(Nehru Place+1)', 'Delhi NCR(Netaji Subhash Place)', 'Delhi NCR(Okhla)', 'Delhi NCR(Old Rajender Nagar)', 'Delhi NCR(Patel Nagar)', 'Delhi NCR(Peeragarhi)', 'Delhi NCR(Pocket-1 Sector 6 Dwarka)', 'Delhi NCR(Rohini)', 'Delhi NCR(Saket)', 'Delhi NCR(Sector 17A)', 'Delhi NCR(Sector Omicron III Greater Noida)', 'Delhi NCR(Sector-135 Noida)', 'Delhi NCR(Sector-142 Noida)', 'Delhi NCR(Sector-18 Noida)', 'Delhi NCR(Sector-2 Noida)', 'Delhi NCR(Sector-24 Gurgaon)', 'Delhi NCR(Sector-3 Noida)', 'Delhi NCR(Sector-38 Noida)', 'Delhi NCR(Sector-59 Noida)', 'Delhi NCR(Sector-65 Noida)', 'Delhi NCR(Sector-74A Gurgaon)', 'Delhi NCR(Sikandarpur)', 'Delhi NCR(Sohna Road)', 'Delhi NCR(South Extension I)', 'Delhi NCR(Sultanpur)', 'Delhi NCR(Sunder Vihar)', 'Delhi NCR(Vaishali)', 'Delhi NCR(Vasant Kunj)', 'Delhi NCR(Vasant Vihar)', 'Delhi NCR(Vikas Puri)', 'Delhi NCR(West Patel Nagar)', 'Delhi(AD Block Pitampura+25)', 'Delhi(Asiad village)', 'Delhi(Azadpur)', 'Delhi(B1 Block Janakpuri)', 'Delhi(Bhikaji Cama)', 'Delhi(Connaught Place)', 'Delhi(Defence Colony)', 'Delhi(Dilshad Garden+1)', 'Delhi(East of Kailash)', 'Delhi(Gandhi Vihar+15)', 'Delhi(Greater Kailash)', 'Delhi(Hauz Khas)', 'Delhi(Janakpuri)', 'Delhi(Jhandewalan)', 'Delhi(Kailash Colony)', 'Delhi(Lajpat Nagar IV)', 'Delhi(Laxmi Nagar)', 'Delhi(Nehru Place)', 'Delhi(Netaji Subhash Place)', 'Delhi(Netaji Subhash Place+1)', 'Delhi(New Friends Colony)', 'Delhi(Okhla)', 'Delhi(Peeragarhi)', 'Delhi(Punjabi Bagh)', 'Delhi(Punjabi Basti+21)', 'Delhi(Ranjit Nagar)', 'Delhi(Safdarjung Enclave)', 'Delhi(Sainik Farms)', 'Delhi(South Extension)', 'Delhi(Taimoor Nagar)', 'Delhi(Vasant Vihar)', 'Delhi(West Patel Nagar)', 'Faridabad', 'Ghaziabad', 'Greater Noida', 'Greater Noida(Surajpur Industrial Area)', 'Noida', 'Noida(NSEZ)', 'Noida(Sector-1 Noida)', 'Noida(Sector-10 Noida)', 'Noida(Sector-125 Noida)', 'Noida(Sector-126 Noida)', 'Noida(Sector-127 Noida)', 'Noida(Sector-132 Noida)', 'Noida(Sector-135 Noida)', 'Noida(Sector-142 Noida)', 'Noida(Sector-144 Noida)', 'Noida(Sector-16 Noida)', 'Noida(Sector-16A Noida)', 'Noida(Sector-2 Noida)', 'Noida(Sector-3 Noida)', 'Noida(Sector-4 Noida)', 'Noida(Sector-57 Noida)', 'Noida(Sector-58 Noida)', 'Noida(Sector-59 Noida)', 'Noida(Sector-6 Noida)', 'Noida(Sector-60 Noida)', 'Noida(Sector-62 Noida)', 'Noida(Sector-63 Noida)', 'Noida(Sector-64 Noida)', 'Noida(Sector-67 Noida)', 'Noida(Sector-7 Noida)', 'Noida(Sector-8 Noida)', 'Noida(Sector-9 Noida)']
Kolkata = ['Kolkata', 'Kolkata(Garia+2)', 'Kolkata(Gariahat)', 'Kolkata(Khudiram Bose Sarani)', 'Kolkata(New Town)', 'Kolkata(Park Street)', 'Kolkata(Rajarhat)', 'Kolkata(Russel Street)', 'Kolkata(Salt Lake)', 'Kolkata(Salt Lake+1)', 'Kolkata(Topsia)']
Ahmedabad = ['Ahmedabad', 'Ahmedabad(Ambawadi)', 'Ahmedabad(Ashram Road)', 'Ahmedabad(Bodakdev)', 'Ahmedabad(Bopal)', 'Ahmedabad(Gota)', 'Ahmedabad(Makarba)', 'Ahmedabad(Memnagar)', 'Ahmedabad(Moraiya)', 'Ahmedabad(Navrangpura)', 'Ahmedabad(Panchwati)', 'Ahmedabad(Prahlad Nagar)', 'Ahmedabad(Ring Road+3)', 'Ahmedabad(Santej)', 'Ahmedabad(Satellite)', 'Ahmedabad(Science City)', 'Ahmedabad(Shilaj)', 'Ahmedabad(Sola)', 'Ahmedabad(South Bopal)', 'Ahmedabad(Vastrapur)', 'Ahmedabad(Vastrapur+1)']
Other = ['Angul', 'Bangladesh', 'Bharuch', 'Bhubaneswar', 'Bhuj', 'Bidar', 'Brunei Darussalam', 'Central African Republic', 'China', 'Dadra and Nagar Haveli', 'Dindigul', 'Dubai', 'Erode', 'Fiji', 'Ganganagar', 'Goa Other', 'Gulbarga', 'Gwalior', 'Hisar', 'INDIA', 'Indonesia', 'Jalandhar', 'Jordan', 'Karur', 'Kiadb Vemegal', 'Kollam', 'Lubumbashi', 'Maldives', 'Mauritius', 'Mehsana', 'Morinda', 'Nepal', 'Other City(s) in Kerala', 'PAN India', 'Panvel', 'Patiala', 'Porbandar', 'Rajasthan Other', 'Ratnagiri', 'Rohtak', 'Rourkela', 'Russia', 'Serbia', 'Shirwal', 'Sierra Leone', 'Solapur', 'Sweden', 'Tanzania', 'Thiruvananthapuram', 'Uttar Pradesh', 'Uzbekistan', 'Valsad', 'Vellore', 'Yamunanagar', 'haryana', 'karnataka', 'other city']

In [23]:
# With the help of prompting, we have bucketed the Location variable into 10 Major Cities

In [24]:
def bucket_cities(x):
    if x in Bengaluru:
        return 'Bengaluru'
    elif x in Mumbai:
        return 'Mumbai'
    elif x in Gurgaon:
        return 'Gurgaon'
    elif x in Pune:
        return 'Pune'
    elif x in Hyderabad:
        return 'Hyderabad'
    elif x in Chennai:
        return 'Chennai'
    elif x in Delhi:
        return 'Delhi'
    elif x in Kolkata:
        return 'Kolkata'
    elif x in Ahmedabad:
        return 'Ahmedabad'
    elif x in Other:
        return 'General'
    else:
        return 'Other'

In [25]:
train.location = train.location.apply(bucket_cities)

In [26]:
test.location = test.location.apply(bucket_cities)

In [27]:
train = pd.concat([train,pd.get_dummies(train['location'],dtype='int')],axis=1)
train.drop('location',axis=1,inplace=True)
test = pd.concat([test,pd.get_dummies(test['location'],dtype='int')],axis=1)
test.drop('location',axis=1,inplace=True)

### Salary - Target Variable

In [28]:
train.salary = train.salary.replace({'0to3':1,'3to6':2,'6to10':3,'10to15':4,'15to25':5,'25to50':6})

### Company Name Encoded

In [29]:
train.drop('company_name_encoded',axis=1,inplace=True)
test.drop('company_name_encoded',axis=1,inplace=True)

### Key Skills

In [30]:
train.key_skills[0]

'team skills, communication skills, analytical skills, problem solving...'

In [31]:
from sklearn.feature_extraction.text import TfidfVectorizer

tv = TfidfVectorizer(stop_words='english',max_df=0.75)
tv.fit(train.key_skills)
features = tv.get_feature_names_out()
key_skills = pd.DataFrame(tv.transform(train.key_skills).toarray(), columns=features)

In [32]:
train = pd.concat([train,key_skills],axis=1)
train.drop('key_skills',axis=1,inplace=True)
train.dropna(inplace=True)

In [33]:
key_skills = pd.DataFrame(tv.transform(test.key_skills).toarray(),columns=features)

In [34]:
test = pd.concat([test,key_skills],axis=1)
test.drop('key_skills',axis=1,inplace=True)

### Job Description

In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer

tv = TfidfVectorizer(stop_words='english',max_df=0.75)
tv.fit(train.job_description)
features = tv.get_feature_names_out()
job_description = pd.DataFrame(tv.transform(train.job_description).toarray(), columns=features)

In [36]:
train = pd.concat([train,job_description],axis=1)
train.drop('job_description',axis=1,inplace=True)
train.dropna(inplace=True)

In [37]:
job_description = pd.DataFrame(tv.transform(test.job_description).toarray(), columns=features)

In [38]:
test = pd.concat([test,job_description],axis=1)
test.drop('job_description',axis=1,inplace=True)

### Job Designation

In [39]:
tv = TfidfVectorizer(stop_words='english',max_df=0.75)
tv.fit(train.job_desig)
features = tv.get_feature_names_out()
job_desig = pd.DataFrame(tv.transform(train.job_desig).toarray(), columns=features)

In [40]:
train = pd.concat([train,job_desig],axis=1)
train.drop('job_desig',axis=1,inplace=True)
train.dropna(inplace=True)

In [41]:
job_desig = pd.DataFrame(tv.transform(test.job_desig).toarray(), columns=features)

In [42]:
test = pd.concat([test,job_desig],axis=1)
test.drop('job_desig',axis=1,inplace=True)

In [43]:
print(train.shape)
print(test.shape)

(19798, 15790)
(6601, 15789)


### XGB-Regressor

In [44]:
drop = ['00', '03', '08', '09', '10', '100', '10th', '11', '11pm', '12', '12th', '13', '14', '15', '1500', '16', '16949', '17', '18', '1d', '1st', '1yr', '20', '2005', '2008', '2012', '2013', '2016', '2017', '2018', '24', '24x7', '27', '27001', '29', '29th', '2g', '2nd', '30', '300', '30am', '30th', '31', '31st', '35', '360', '365', '3d', '3g', '3p', '3pillar', '3rd', '3yrs', '40', '400', '4g', '4years', '50', '500', '5g', '5yrs', '60', '6months', '6yrs', '7th', '9001', '9k', '9yrs', 'aa', 'aas', 'ab', 'abap', 'abc', 'ability', 'abroad', 'absorption', 'abstraction', 'abt', 'academic', 'academics', 'accel', 'accent', 'accenture', 'acceptance', 'access', 'accessories', 'account', 'accountancy', 'accountant', 'accountants', 'accounting', 'accounts', 'achievement', 'acoustic', 'acquire', 'acquisition', 'acquisitions', 'acrobat', 'act', 'action', 'actions', 'activation', 'active', 'actively', 'activemq', 'activities', 'actuarial', 'actuary', 'ad', 'adams', 'adaptability', 'adc', 'addressing', 'adf', 'adhoc', 'adjudication', 'adl', 'adm', 'admin', 'administration', 'administrative', 'administrator', 'admissions', 'adobe', 'adoption', 'adp', 'ads', 'advance', 'advanced', 'advertisement', 'advertising', 'advise', 'adviser', 'advisor', 'advisors', 'advisory', 'advocacy', 'advocate', 'adwords', 'aem', 'aero', 'aeronautical', 'aerospace', 'affairs', 'affiliate', 'affordable', 'africa', 'afternoon', 'age', 'agency', 'agent', 'agents', 'agile', 'agility', 'agm', 'agreement', 'agreements', 'agri', 'agribusiness', 'agriculture', 'agrochemical', 'ahmedabad', 'ahu', 'ai', 'aim', 'air', 'aircraft', 'airline', 'airlines', 'airport', 'aix', 'ajax', 'aka', 'aldon', 'alert', 'alerts', 'algebra', 'algo', 'algorithm', 'algorithmic', 'algorithms', 'aligned', 'alliances', 'allocation', 'alt', 'alternate', 'alternative', 'alteryx', 'amadeus', 'amazon', 'amd', 'amdocs', 'america', 'american', 'aml', 'amr', 'anaconda', 'anal', 'analog', 'analyse', 'analyses', 'analysing', 'analysis', 'analyst', 'analysts', 'analytic', 'analytical', 'analytics', 'analyze', 'analyzer', 'anda', 'andheri', 'android', 'anesthesia', 'angular', 'angularjs', 'animation', 'anlaytics', 'annual', 'annuities', 'ansa', 'ansible', 'ansys', 'anti', 'ap', 'apac', 'apache', 'apdrp', 'apex', 'api', 'apis', 'apm', 'apo', 'app', 'apparel', 'appian', 'appium', 'appliances', 'application', 'applications', 'applied', 'apply', 'appraisal', 'apprentice', 'approval', 'approvals', 'apps', 'apriso', 'aptean', 'ar', 'arabic', 'archer', 'architect', 'architecting', 'architects', 'architectural', 'architecture', 'architectures', 'arcsight', 'area', 'areas', 'argus', 'ariba', 'arima', 'arm', 'arrangements', 'art', 'article', 'articles', 'articulate', 'artificial', 'asap', 'asia', 'asic', 'asme', 'aso', 'asp', 'aspect', 'aspirants', 'assembler', 'assembly', 'assessing', 'assessment', 'assessments', 'asset', 'assets', 'assignment', 'assignments', 'assist', 'assistance', 'assistant', 'assisting', 'associate', 'associates', 'assortment', 'asst', 'assurance', 'assured', 'astm', 'atg', 'atl', 'atlassian', 'atm', 'atos', 'attempt', 'attention', 'attitude', 'attorney', 'audio', 'audit', 'auditing', 'auditor', 'auditors', 'audits', 'aurangabad', 'australia', 'australian', 'author', 'auto', 'autocad', 'automated', 'automation', 'automobile', 'automobiles', 'automotive', 'autosys', 'availability', 'available', 'aviation', 'avp', 'awareness', 'aws', 'ax', 'axis', 'axure', 'azure', 'b1', 'b2', 'b2b', 'b2c', 'ba', 'baan', 'backend', 'backfill', 'background', 'backoffice', 'backup', 'balance', 'banca', 'bancs', 'bangalore', 'bangladesh', 'banglore', 'bank', 'banker', 'banking', 'banks', 'bar', 'barging', 'base', 'base24', 'based', 'basel', 'bash', 'basic', 'basics', 'basis', 'batch', 'batches', 'bau', 'bba', 'bbm', 'bca', 'bcg', 'bcom', 'bd', 'bdd', 'bde', 'bdm', 'bdna', 'bds', 'beautiful', 'beauty', 'behavior', 'behavioral', 'behaviour', 'behavioural', 'belt', 'bench', 'benchmarking', 'bending', 'benefit', 'benefits', 'bengali', 'bengaluru', 'beverage', 'beverages', 'bex', 'bfs', 'bfsi', 'bgv', 'bhopal', 'bhr', 'bi', 'bid', 'bidding', 'big', 'big4', 'bigdata', 'bigfix', 'biggest', 'billing', 'bills', 'binary', 'bing', 'bio', 'bioanalytical', 'biochemistry', 'bioinformatics', 'biology', 'biomedical', 'biosimilars', 'biostatistics', 'biotech', 'biotechnology', 'birt', 'bis', 'bit', 'bits', 'biu', 'biw', 'biztalk', 'bl', 'black', 'blended', 'blending', 'block', 'blockchain', 'blog', 'blogging', 'blogs', 'bloomberg', 'blue', 'blueprinting', 'blueprism', 'bluetooth', 'bm', 'bmc', 'bms', 'bo', 'board', 'boarding', 'bods', 'body', 'bom', 'bonus', 'book', 'booking', 'books', 'boomi', 'boost', 'boot', 'bootstrap', 'bot', 'botany', 'box', 'bpc', 'bpharm', 'bpm', 'bpma', 'bpo', 'bpos', 'bpr', 'bpt', 'branch', 'brand', 'branding', 'brands', 'brd', 'brds', 'brf', 'bridges', 'british', 'broadcast', 'broadcasting', 'brochures', 'brokerage', 'broking', 'browsing', 'bs', 'bsa', 'bsc', 'bsp', 'bss', 'btech', 'btl', 'bu', 'budget', 'budgetary', 'budgeting', 'bug', 'bugzilla', 'build', 'builder', 'building', 'buildings', 'bulk', 'bus', 'business', 'businessobjects', 'buy', 'buyer', 'buyers', 'buying', 'bw', 'c09', 'c2h', 'c4c', 'ca', 'cab', 'cabin', 'caching', 'cad', 'cadence', 'cae', 'cake', 'cakephp', 'calculation', 'calculations', 'calendars', 'calibration', 'caller', 'callers', 'calling', 'calls', 'camel', 'campaign', 'campaigns', 'campus', 'canada', 'candidate', 'candidates', 'cap', 'capability', 'capable', 'capacity', 'capex', 'capgemini', 'capital', 'caps', 'capsule', 'captivate', 'captive', 'car', 'carbon', 'card', 'cards', 'care', 'career', 'cargo', 'carpet', 'cart', 'cas', 'case', 'cases', 'cash', 'cassandra', 'casualty', 'cat', 'catalog', 'catalogue', 'catalyst', 'catastrophe', 'category', 'catering', 'cati', 'catia', 'cause', 'cbse', 'ccar', 'cce', 'ccie', 'ccm', 'ccna', 'ccnp', 'cd', 'cdd', 'cdm', 'cdn', 'cdo', 'ce', 'cell', 'cement', 'center', 'centered', 'centers', 'central', 'centralized', 'centre', 'centric', 'ceo', 'certificates', 'certification', 'certified', 'certify', 'cfa', 'cfd', 'cfo', 'cfs', 'cgmp', 'chaid', 'chain', 'chairman', 'champions', 'change', 'changepond', 'changes', 'channel', 'channels', 'charge', 'chart', 'charted', 'chartered', 'chartering', 'charting', 'charts', 'chat', 'chatbot', 'chatbots', 'check', 'checkpoint', 'chef', 'chemical', 'chemicals', 'chemist', 'chemistry', 'chennai', 'chief', 'chillers', 'chip', 'chipset', 'chromatographer', 'chromatography', 'ci', 'circuit', 'circuits', 'cis', 'cisa', 'cisco', 'cism', 'cissp', 'citi', 'cities', 'citiustech', 'citrix', 'city', 'civil', 'cl', 'claim', 'claims', 'clarity', 'class', 'classes', 'classic', 'classroom', 'clean', 'cleaning', 'cleansing', 'clearing', 'click', 'client', 'clients', 'climate', 'clinical', 'close', 'closing', 'closure', 'cloud', 'cloudera', 'cluster', 'clustering', 'cm', 'cma', 'cmc', 'cmd', 'cmmi', 'cmo', 'cms', 'cmt', 'cng', 'coach', 'coaching', 'coal', 'coating', 'cobit', 'cobol', 'coc', 'cochin', 'cocoa', 'code', 'codeigniter', 'coder', 'codes', 'coding', 'cognitive', 'cognizant', 'cognos', 'cold', 'collaboration', 'collar', 'collateral', 'collaterals', 'collection', 'collections', 'college', 'colleges', 'com', 'comm', 'command', 'commerce', 'commercial', 'commissioning', 'commodities', 'commodity', 'communication', 'communications', 'community', 'comp', 'companies', 'company', 'compensation', 'competency', 'competition', 'competitive', 'competitor', 'compiler', 'complaint', 'complaints', 'complete', 'compliance', 'compliances', 'component', 'components', 'composite', 'computation', 'computational', 'compute', 'computer', 'computers', 'computing', 'concept', 'concepts', 'conceptualization', 'concur', 'concurrent', 'condition', 'conduct', 'conducting', 'conference', 'conferences', 'configuration', 'configurations', 'configuring', 'confirmit', 'conflict', 'confluence', 'conglomerate', 'connect', 'connected', 'connecting', 'connection', 'connectivity', 'connectors', 'considered', 'console', 'consolidation', 'construct', 'construction', 'consultancy', 'consultant', 'consultants', 'consultative', 'consulting', 'consumer', 'consumption', 'contact', 'containers', 'content', 'continuity', 'continuous', 'contract', 'contracting', 'contractor', 'contracts', 'contractual', 'contribution', 'contributor', 'control', 'controller', 'controllership', 'controlling', 'controls', 'conversion', 'conversions', 'cool', 'cooling', 'coordinating', 'coordination', 'coordinator', 'copa', 'copy', 'copywriter', 'copywriting', 'cordova', 'core', 'corp', 'corporate', 'corporates', 'corporation', 'corrective', 'corrosion', 'cosmetics', 'cost', 'costing', 'cotton', 'counsel', 'counseling', 'counsellor', 'counter', 'counterparty', 'country', 'coupa', 'course', 'courses', 'coverage', 'cpa', 'cpc', 'cpg', 'cpq', 'cq', 'cq5', 'cr', 'crash', 'crawling', 'creation', 'creative', 'credentialing', 'credit', 'crew', 'critical', 'crm', 'cro', 'crop', 'cross', 'crystal', 'cs', 'csa', 'cse', 'csm', 'csr', 'css', 'css3', 'ctc', 'cto', 'cube', 'cucumber', 'cuda', 'culture', 'cum', 'curation', 'currency', 'current', 'curriculum', 'cursor', 'custom', 'custome', 'customer', 'customet', 'customization', 'customize', 'customized', 'customs', 'cv', 'cvp', 'cvs', 'cwa', 'cx', 'cxo', 'cybage', 'cyber', 'cybersecurity', 'cycle', 'cytometry', 'd2h', 'd3', 'daily', 'dairy', 'damage', 'dash', 'dashboard', 'dashboards', 'data', 'database', 'databases', 'datacenter', 'datascience', 'datastage', 'dataware', 'datawarehouse', 'datawarehousing', 'date', 'dax', 'day', 'days', 'db', 'db2', 'dba', 'dbm', 'dbms', 'dbs', 'dce', 'dcf', 'dcl', 'dcm', 'deadline', 'deal', 'dealer', 'dealing', 'debit', 'debt', 'debugging', 'decipher', 'decision', 'decisions', 'decomposition', 'dedicated', 'deemed', 'deep', 'defect', 'defined', 'definition', 'degree', 'delay', 'delegate', 'delhi', 'delivery', 'dell', 'deloitte', 'delphi', 'delta', 'demand', 'demandware', 'demo', 'demurrage', 'denial',
        'denials', 'denim', 'dental', 'dentist', 'department', 'deployment', 'deposits', 'dept', 'deputy', 'derivatives', 'dermatologist', 'descriptive', 'design', 'designer', 'designers', 'designing', 'designs', 'desk', 'desktop', 'detailed', 'detection', 'dev', 'develoer', 'develop', 'developement', 'developer', 'developers', 'developing', 'development', 'developments', 'device', 'devices', 'devops', 'dfa', 'dfmea', 'dfp', 'dft', 'dgm', 'di', 'diagnostics', 'dialer', 'dialog', 'dietetics', 'digital', 'digitization', 'diligence', 'dimension', 'dimensional', 'diploma', 'direct', 'directing', 'direction', 'director', 'directory', 'dis', 'disbursement', 'discipline', 'disciplined', 'discovery', 'discrete', 'display', 'dispute', 'disruptive', 'distance', 'distressed', 'distributed', 'distribution', 'distributor', 'disys', 'div', 'diverse', 'diversified', 'diversity', 'division', 'divisional', 'django', 'dlp', 'dm', 'dmfi', 'dms', 'dnb', 'docker', 'dockers', 'docketing', 'doctor', 'doctorate', 'doctors', 'document', 'documentation', 'documents', 'documentum', 'dom', 'domain', 'domains', 'domestic', 'dot', 'dotnet', 'double', 'doubleclick', 'dpr', 'dq', 'dqa', 'dra', 'drafting', 'draw', 'drawings', 'drc', 'drg', 'drive', 'driven', 'driver', 'drivers', 'drives', 'driving', 'drm', 'dropout', 'drug', 'drupal', 'ds', 'dse', 'dso', 'dsp', 'dt', 'dtm', 'dtp', 'du', 'durability', 'durable', 'durables', 'dv', 'dv360', 'dvp', 'dw', 'dwbi', 'dwh', 'dxc', 'dy', 'dyna', 'dynamic', 'dynamics', 'dynatrace', 'e2e', 'ea', 'ead', 'eagle', 'eai', 'early', 'east', 'ebs', 'ecc', 'ece', 'ecl', 'eclipse', 'ecm', 'ecommerce', 'econometric', 'economic', 'economics', 'economy', 'ecosystem', 'ecu', 'ed', 'eda', 'edge', 'edi', 'editing', 'editor', 'editorial', 'editors', 'edr', 'education', 'educational', 'edw', 'ee', 'effective', 'effectiveness', 'efficiency', 'effort', 'egrc', 'ehr', 'ehs', 'eia', 'einstein', 'eir', 'elastic', 'elasticsearch', 'elearning', 'electrical', 'electro', 'electronic', 'electronics', 'element', 'eligible', 'elk', 'eloqua', 'elt', 'email', 'emails', 'embedded', 'ember', 'emc', 'emea', 'emergency', 'emerging', 'emi', 'employed', 'employee', 'employer', 'employment', 'ems', 'en', 'enablement', 'encore', 'encryption', 'end', 'ended', 'endpoint', 'energy', 'enforcenment', 'eng', 'engagement', 'engg', 'engine', 'engineer', 'engineering', 'engineers', 'engines', 'english', 'enhancement', 'enhancements', 'enodeb', 'enterprise', 'enterprises', 'entertainment', 'entity', 'entrepreneur', 'entrepreneurship', 'entries', 'entry', 'environment', 'environmental', 'environments', 'epc', 'epic', 'epm', 'equip', 'equipment', 'equipments', 'equities', 'equity', 'er', 'erp', 'error', 'erwin', 'esb', 'escalation', 'escription', 'esp', 'estate', 'estimation', 'estimator', 'ethical', 'etl', 'etrm', 'eu', 'europe', 'eutx', 'evaluation', 'evaluator', 'evangelizing', 'event', 'events', 'eviews', 'evolve', 'ewm', 'ex', 'exadata', 'excel', 'excellence', 'excellent', 'exchange', 'excited', 'exciting', 'exclusive', 'exec', 'execution', 'executive', 'executives', 'exela', 'existing', 'exit', 'exp', 'expansion', 'expect', 'expense', 'expenses', 'experience', 'experienced', 'experimental', 'experis', 'expert', 'expertise', 'experts', 'exploit', 'exploration', 'exploratory', 'export', 'exports', 'exposure', 'express', 'ext', 'extended', 'external', 'extjs', 'extra', 'extractable', 'extraction', 'extrusion', 'f2f', 'f5', 'fa', 'fabric', 'fabrication', 'facebook', 'facets', 'facilitation', 'facilities', 'facility', 'facing', 'factiva', 'factor', 'factors', 'factory', 'factset', 'faculty', 'failover', 'failure', 'familiarity', 'family', 'farm', 'fashion', 'fast', 'fastest', 'fault', 'fb', 'fea', 'feasibility', 'feature', 'feedback', 'fees', 'fem', 'female', 'females', 'fi', 'fibre', 'fica', 'fico', 'field', 'figures', 'file', 'filenet', 'filing', 'filling', 'fin', 'final', 'finalisation', 'finalization', 'finance', 'financial', 'financials', 'financing', 'finished', 'finishing', 'finite', 'fintech', 'fiori', 'fired', 'firewall', 'firewalls', 'firm', 'firms', 'firmware', 'fit', 'fix', 'fixed', 'fixing', 'flair', 'flare', 'flash', 'flask', 'fleet', 'flex', 'flexera', 'flexible', 'flight', 'flipkart', 'floor', 'floorplanning', 'flow', 'flows', 'fluid', 'flume', 'fm', 'fmcd', 'fmcg', 'fms', 'fna', 'fo', 'focus', 'follow', 'food', 'force', 'forecast', 'forecasting', 'foreign', 'forensic', 'forensics', 'forest', 'forestry', 'forests', 'forex', 'forging', 'form', 'formal', 'formalities', 'format', 'formats', 'formatting', 'forms', 'formulas', 'formulating', 'formulation', 'formulations', 'fortinet', 'fortune', 'forum', 'forwarding', 'foundation', 'foundational', 'foundations', 'founding', 'fp', 'fpa', 'fpga', 'fpna', 'fractal', 'frame', 'framework', 'frameworks', 'franchise', 'franchisee', 'fraud', 'frd', 'freelance', 'freelancer', 'freight', 'french', 'frequency', 'fresh', 'fresher', 'freshers', 'frm', 'frontend', 'frozen', 'frtb', 'fs', 'fscm', 'fsldm', 'fso', 'fullstack', 'fulltime', 'function', 'functional', 'functionality', 'functions', 'fund', 'fundamental', 'fundamentals', 'funded', 'funding', 'funds', 'furniture', 'fusion', 'future', 'futures', 'gaap', 'gain', 'gainsight', 'game', 'games', 'gaming', 'gap', 'garment', 'garments', 'gartner', 'gas', 'gate', 'gateway', 'gather', 'gathering', 'gc', 'gcr', 'gd', 'gdpr', 'gds', 'ge', 'gear', 'gen', 'general', 'generalist', 'generation', 'generators', 'generic', 'genesys', 'genome', 'genpact', 'genre', 'geographic', 'geographical', 'geotechnical', 'german', 'getter', 'ggplot', 'ghaziabad', 'gimp', 'gis', 'git', 'github', 'gitlab', 'gl', 'global', 'glp', 'gm', 'gmi', 'gnm', 'goa', 'golang', 'good', 'goods', 'google', 'governance', 'government', 'govt', 'gp', 'gpp', 'gpu', 'grad', 'grade', 'graduate', 'graduates', 'grafana', 'grails', 'graphic', 'graphics', 'graphite', 'grc', 'greases', 'great', 'greater', 'green', 'greenplum', 'grid', 'grievance', 'grievances', 'grocery', 'grooming', 'groovy', 'ground', 'group', 'growing', 'growth', 'gst', 'gt', 'gtm', 'gts', 'guest', 'gui', 'guide', 'guidelines', 'guides', 'gujarat', 'gurgaon', 'guru', 'gurugram', 'gwt', 'gxt', 'h1b', 'h2o', 'hacker', 'hacking', 'hadoop', 'hair', 'hajj', 'hana', 'hand', 'handling', 'hands', 'handset', 'hard', 'hardware', 'harness', 'hat', 'having', 'hbase', 'hcl', 'hcm', 'hcv', 'hdfc', 'hdfs', 'hdinsight', 'head', 'headcount', 'headquartered', 'health', 'healthcare', 'heat', 'heaters', 'heavy', 'hedge', 'help', 'helpdesk', 'heor', 'hfm', 'hfr', 'hft', 'hgs', 'hi', 'hibernate', 'high', 'higher', 'highly', 'highway', 'hiirng', 'hil', 'hindi', 'hipaa', 'hire', 'hireajackal', 'hiring', 'hive', 'hl', 'hl7', 'hlookup', 'hmm', 'hni', 'hod', 'holiday', 'home', 'hong', 'hoovers', 'horeca', 'hospital', 'hospitality', 'host', 'hosting', 'hotel', 'house', 'housekeeping', 'housing', 'hp', 'hpe', 'hplc', 'hq', 'hr', 'hrba', 'hrbp', 'hrd', 'hris', 'hrm', 'hse', 'html', 'html5', 'http', 'hubspot', 'huge', 'human', 'hunt', 'hunting', 'hvac', 'hy', 'hybrid', 'hybris', 'hyderabad', 'hyper', 'hyperion', 'hypermesh', 'hypothesis', 'iaas', 'iam', 'ibm', 'ic', 'icd', 'icg', 'icm', 'icp', 'ict', 'icwa', 'id', 'ide', 'idea', 'ideas', 'ideation', 'identification', 'identity', 'ids', 'ifrs', 'ifrs9', 'igaap', 'ii', 'iii', 'iiit', 'iim', 'iims', 'iit', 'ile', 'illustrator', 'ilt', 'image', 'imaging', 'immediate', 'immediately', 'immunology', 'impact', 'impairments', 'impala', 'implement', 'implementation', 'implementations', 'import', 'improvement', 'ims', 'inbound', 'incharge', 'incident', 'incidents', 'income', 'incoming', 'ind', 'independently', 'indesign', 'index', 'indexing', 'india', 'indian', 'indirect', 'individual', 'indore', 'induction', 'industrial', 'industries', 'industry', 'info', 'infopath', 'infor', 'informatica', 'informatics', 'information', 'infotainment', 'infra', 'infrastructure', 'ingenium', 'ingestion', 'initiation', 'initiative', 'initiatives', 'initio', 'injection', 'innovation', 'innovations', 'innovative', 'inpatient', 'ins', 'inside', 'insight', 'insights', 'inspection', 'inspector', 'install', 'installation', 'institute', 'institutes', 'institutional', 'institutions', 'instruction', 'instructional', 'instrument', 'instrumentation', 'instruments', 'insurance', 'integrated', 'integration', 'integrator', 'integrity', 'intel', 'intellectual', 'intelligence', 'intelligent', 'inter', 'interact', 'interaction', 'interactive', 'interconnect', 'interface', 'interfaces', 'interfacing', 'interior', 'intermediate', 'intermediates', 'intern', 'internal', 'international', 'internet', 'interns', 'internship', 'interpersonal', 'interpret', 'interpretation', 'interpreter', 'interventions', 'interview', 'interviewing', 'interviews', 'intrusion', 'inventory', 'invest', 'investigation', 'investigations', 'investigator', 'investment', 'investments', 'investor', 'investran', 'invision', 'inviting', 'invoice', 'io', 'ioc', 'iom', 'ionic', 'ios', 'iot', 'ip', 'ipc', 'ipcc', 'iphone', 'ippt', 'ipr', 'ips', 'iq', 'ir', 'isb', 'isi', 'isms', 'iso', 'issue', 'issues', 'istqb', 'isu', 'italian', 'ites', 'itgc', 'iti', 'itil', 'itsm', 'itunes', 'iv', 'ivhm', 'j2ee', 'jain', 'jaipur', 'japanese', 'java', 'java8', 'javascript', 'jax', 'jcaps', 'jcl', 'jd', 'jdbc', 'jde', 'jee', 'jenkins', 'jersey', 'jetty', 'jewellery', 'jio', 'jira', 'jmeter', 'jni', 'jnu', 'job', 'jobs', 'join', 'joinee', 'joinees', 'joiner', 'joiners', 'joining', 'joins', 'journal', 'journalism', 'journals', 'journey', 'jquery', 'jr', 'js', 'jsf', 'json', 'junior', 'juniper', 'junit', 'jupyter', 'kafka', 'kam', 'kanada', 'kannada', 'karnataka', 'keeping', 'kerala', 'keras', 'kernel', 'key', 'keyboard', 'keynote', 'keyword', 'kf', 'khar', 'kibana', 'kinesis', 'kitchen', 'know', 'knowledge', 'known', 'kofax', 'kolkata', 'kong', 'kotlin', 'kpi', 'kpo', 'kra', 'kta', 'kubernetes', 'kyc', 'l1', 'l2', 'l3', 'lab', 'label', 'laboratory', 'labour', 'labs', 'lac', 'lacs', 'lake', 'lakhs', 'lambda', 'lamp', 'lan',
        'land', 'landscape', 'language', 'languages', 'lap', 'laravel', 'large', 'largest', 'latency', 'launch', 'laundering', 'law', 'laws', 'lawyer', 'layer', 'layers', 'layout', 'lcms', 'lcv', 'ldap', 'leachable', 'lead', 'leader', 'leadership', 'leading', 'leads', 'lean', 'learn', 'learning', 'lease', 'leasing', 'leave', 'ledger', 'legal', 'leisure', 'lending', 'leonardo', 'letter', 'level', 'levels', 'lgd', 'li', 'liabilities', 'liability', 'liaison', 'libraries', 'library', 'licensing', 'life', 'lifecycle', 'liferay', 'lifesciences', 'lifestyle', 'lift', 'light', 'lightning', 'like', 'likely', 'limit', 'limited', 'lims', 'line', 'linear', 'lingual', 'link', 'linked', 'linkedin', 'linq', 'linux', 'liquidity', 'list', 'listed', 'listening', 'listings', 'literate', 'literature', 'litigation', 'little', 'live', 'livestock', 'living', 'll', 'llb', 'lld', 'llm', 'llp', 'lms', 'load', 'loadrunner', 'loan', 'loaniq', 'loans', 'lobby', 'local', 'localization', 'location', 'locations', 'log', 'logging', 'logical', 'logistic', 'logistics', 'looking', 'lookup', 'los', 'loss', 'lot', 'low', 'lower', 'loyalty', 'lpaas', 'lpo', 'ls', 'lte', 'lubricants', 'lumira', 'luxury', 'lvs', 'lync', 'm3', 'ma', 'mac', 'machine', 'machinery', 'machines', 'macro', 'macros', 'magazine', 'magento', 'mahindra', 'mail', 'mainframe', 'maintaining', 'maintenance', 'major', 'making', 'malayalam', 'male', 'malware', 'manage', 'managed', 'management', 'manager', 'managerial', 'managers', 'managing', 'managment', 'mandatory', 'mangement', 'manipulation', 'manpower', 'mantas', 'manual', 'manufacturing', 'map', 'mapping', 'mapreduce', 'maps', 'marathi', 'margin', 'marine', 'mark', 'market', 'marketer', 'marketing', 'marketo', 'marketplace', 'marketplaces', 'markets', 'markup', 'mart', 'martech', 'mass', 'master', 'matching', 'material', 'materials', 'mathematical', 'mathematics', 'maths', 'matlab', 'matrix', 'matter', 'matters', 'maven', 'max', 'maximization', 'maximo', 'maximum', 'mba', 'mbbs', 'mbd', 'mc', 'mca', 'mci', 'mcom', 'mcse', 'md', 'mdi', 'mdm', 'mds', 'mdx', 'mean', 'measurement', 'meat', 'mech', 'mechanical', 'mechanics', 'media', 'medical', 'medicine', 'meditech', 'medium', 'mee', 'meeting', 'meetings', 'mega', 'member', 'memo', 'memory', 'menswear', 'mentor', 'mentoring', 'mep', 'merchandise', 'merchandising', 'merchant', 'merger', 'mergers', 'mes', 'messaging', 'met', 'meta', 'metal', 'meter', 'metering', 'method', 'methodologies', 'methodology', 'methods', 'metric', 'metrics', 'mf', 'mfc', 'mfg', 'mgmt', 'mgr', 'mgt', 'mhrm', 'mi', 'micro', 'microbiological', 'microbiologist', 'microbiology', 'microfinance', 'microservices', 'microsoft', 'microstrategy', 'microstrategy_', 'mid', 'middle', 'middleware', 'migration', 'min', 'minimum', 'mining', 'minitab', 'mirroring', 'mis', 'misg', 'mitigation', 'mix', 'mixed', 'mktg', 'ml', 'mlt', 'mm', 'mnc', 'mnm', 'mobile', 'mobility', 'mobilization', 'mock', 'mockito', 'model', 'modeler', 'modeling', 'modeller', 'modellers', 'modelling', 'models', 'modern', 'modifying', 'modular', 'module', 'modules', 'mohali', 'monday', 'monetary', 'money', 'mongodb', 'monitor', 'monitoring', 'monster', 'month', 'monthly', 'months', 'mortgage', 'mortgages', 'motion', 'motivation', 'motor', 'moulding', 'mp', 'mpharm', 'mqtt', 'mr', 'mrb', 'mrm', 'mro', 'mrp', 'ms', 'msbi', 'msc', 'msd', 'msme', 'msoffice', 'mst', 'mt', 'mtech', 'mts', 'mu', 'mule', 'mulesoft', 'multi', 'multimedia', 'multinational', 'multiple', 'multitasking', 'multithreading', 'mulund', 'mumbai', 'murex', 'mutual', 'mvc', 'mvp', 'mvvm', 'mypedia', 'mysql', 'mystery', 'nagar', 'nagios', 'nagpur', 'naive', 'nam', 'nastran', 'national', 'native', 'natural', 'naukri', 'nav', 'navi', 'navigation', 'navision', 'nbfc', 'ncr', 'near', 'need', 'needed', 'needs', 'negative', 'negotiation', 'negotiations', 'neo4j', 'net', 'netapp', 'netcool', 'netezza', 'netflix', 'netinsight', 'netsuite', 'network', 'networking', 'networks', 'neural', 'new', 'news', 'newsletters', 'nexus', 'nfv', 'ngo', 'nice', 'nielsen', 'night', 'nist', 'nit', 'nits', 'nlg', 'nlp', 'nltk', 'nms', 'nn', 'noc', 'node', 'nodejs', 'noida', 'non', 'north', 'nos', 'nosql', 'note', 'notes', 'notice', 'november', 'npd', 'npi', 'npm', 'numbers', 'numerical', 'numpy', 'nurse', 'nursing', 'nutrition', 'nvh', 'o2c', 'oaf', 'obia', 'obiee', 'object', 'objective', 'objects', 'ocean', 'oct', 'octopus', 'od', 'odbc', 'odi', 'odoo', 'oe', 'ofc', 'offer', 'offering', 'offers', 'office', 'officer', 'officers', 'offline', 'offshore', 'ofsaa', 'oil', 'oils', 'oim', 'ola', 'ollydbg', 'omni', 'omniture', 'onboarding', 'oncology', 'online', 'onshore', 'onsite', 'oo', 'ooad', 'oop', 'oops', 'open', 'opencv', 'opening', 'openings', 'openstack', 'opentext', 'operating', 'operation', 'operational', 'operations', 'operator', 'opex', 'ophthalmic', 'opportunities', 'opportunity', 'ops', 'optical', 'optics', 'optimisation', 'optimization', 'options', 'oracle', 'oral', 'orchestration', 'order', 'orders', 'ordination', 'ordinator', 'org', 'organic', 'organisation', 'organisational', 'organization', 'organizational', 'organizing', 'orientation', 'oriented', 'origination', 'orm', 'os', 'osc', 'osi', 'oss', 'otc', 'otm', 'ott', 'outbound', 'outcome', 'outcomes', 'outlets', 'outlook', 'outreach', 'outs', 'outsourcing', 'overall', 'overseas', 'oversight', 'owner', 'p2p', 'p6', 'pa', 'paas', 'pace', 'package', 'packages', 'packaging', 'page', 'pages', 'paid', 'pain', 'pan', 'pandas', 'panel', 'paper', 'paralegal', 'pardot', 'parel', 'parsing', 'participates', 'parties', 'partner', 'partnering', 'partners', 'partnership', 'partnerships', 'parts', 'party', 'pas', 'pass', 'passed', 'passionate', 'patch', 'patent', 'patents', 'pathology', 'patient', 'pattern', 'patterns', 'pavement', 'pay', 'payable', 'payables', 'payer', 'payment', 'payments', 'payroll', 'pcb', 'pci', 'pcie', 'pd', 'pdf', 'pdm', 'pe', 'pearl', 'pec', 'peeragarhi', 'pega', 'penetration', 'pension', 'pensions', 'pentaho', 'people', 'peoplesoft', 'percentage', 'perficient', 'perforce', 'performance', 'period', 'periodic', 'perl', 'permanent', 'persistent', 'person', 'personal', 'personality', 'personalization', 'personnel', 'pes', 'petrochemical', 'petty', 'pf', 'pgdm', 'ph', 'pha', 'pharm', 'pharma', 'pharmaceutical', 'pharmaceuticals', 'pharmacovigilance', 'pharmacy', 'phase', 'phd', 'phone', 'phonegap', 'phones', 'photographs', 'photoshop', 'php', 'physical', 'physician', 'physics', 'physiotherapist', 'pi', 'pig', 'pipeline', 'piping', 'pivot', 'pl', 'place', 'placement', 'plan', 'planner', 'planning', 'plans', 'plant', 'plastic', 'platform', 'platforms', 'play', 'plc', 'plm', 'plsql', 'plugins', 'plus', 'pm', 'pmc', 'pmi', 'pmo', 'pmp', 'pms', 'pmt', 'po', 'poc', 'point', 'police', 'policies', 'policy', 'political', 'polymer', 'portal', 'portals', 'portfolio', 'portlets', 'portuguese', 'pos', 'position', 'positioning', 'positions', 'positive', 'post', 'postgresql', 'posting', 'postman', 'power', 'powerbi', 'powercenter', 'powerpoint', 'powershell', 'pp', 'ppa', 'ppc', 'ppm', 'ppp', 'ppt', 'pr', 'practice', 'practitioner', 'pradesh', 'pre', 'precast', 'precision', 'predictive', 'preferably', 'preferred', 'premier', 'premiere', 'premise', 'premium', 'preparation', 'prepare', 'preparing', 'prepress', 'presales', 'presence', 'present', 'presentable', 'presentation', 'presentations', 'president', 'press', 'pressure', 'prevention', 'price', 'pricing', 'primarily', 'primary', 'primavera', 'prime', 'primetime', 'prince', 'prince2', 'principal', 'principle', 'principles', 'print', 'printer', 'printing', 'prior', 'prism', 'privacy', 'private', 'prm', 'pro', 'proactive', 'proactively', 'problem', 'procedure', 'procedures', 'process', 'processes', 'processing', 'processor', 'procure', 'procurement', 'prod', 'product', 'production', 'productivity', 'products', 'professional', 'professionals', 'professor', 'proficiency', 'profile', 'profiles', 'profiling', 'profit', 'profitability', 'program', 'programing', 'programme', 'programmer', 'programmers', 'programmes', 'programming', 'programs', 'progressing', 'project', 'projection', 'projections', 'projects', 'promotion', 'promotional', 'promotions', 'proof', 'property', 'proposal', 'proposals', 'prosecution', 'prospect', 'prospecting', 'protection', 'protocol', 'protocols', 'prototype', 'prototyping', 'provider', 'providers', 'providing', 'proxy', 'prpc', 'ps', 'psychology', 'ptp', 'public', 'publication', 'publisher', 'publishers', 'publishing', 'pune', 'punjabi', 'puppet', 'purchase', 'purchasing', 'purifier', 'purpose', 'pursuing', 'pv', 'pvc', 'pvt', 'pwd', 'py', 'pyspark', 'python', 'pytorch', 'qa', 'qc', 'qe', 'qfd', 'qlik', 'qliksense', 'qlikview', 'qm', 'qmr', 'qms', 'qradar', 'qs', 'qt', 'qtp', 'qualcomm', 'qualification', 'qualified', 'qualitative', 'quality', 'qualtrics', 'qualys', 'quant', 'quantitative', 'quantities', 'quantity', 'quantum', 'quarterly', 'queries', 'query', 'quest', 'queue', 'quick', 'quotation', 'r12', 'r2r', 'ra', 'rabbitmq', 'radiant', 'radio', 'radiology', 'rails', 'railways', 'raising', 'rams', 'random', 'range', 'ranking', 'ranorex', 'rate', 'rates', 'rating', 'ratings', 'ratio', 'ratios', 'raw', 'rbi', 'rc', 'rca', 'rcc', 'rcm', 'rdbms', 'rdd', 'rds', 'reach', 'react', 'reactjs', 'reactnative', 'reader', 'reading', 'ready', 'real', 'reality', 'reasoning', 'receipt', 'receivable', 'receivables', 'receiving', 'recent', 'receptionist', 'recognition', 'recommendations', 'reconciliation', 'reconciliations', 'record', 'records', 'recovery', 'recruit', 'recruiter', 'recruiters', 'recruiting', 'recruitment', 'red', 'redhat', 'redis', 'redshift', 'reduce', 'reduction', 'redux', 'reengineering', 'reference', 'refinery', 'refx', 'region', 'regional', 'registration', 'regression', 'regular', 'regulation', 'regulatory', 'reinsurance', 'rejection', 'related', 'relation', 'relational', 'relations', 'relationship', 'relationships', 'relative', 'release', 'relevant', 'reliability', 'reliance', 'relocate',
        'remedy', 'remote', 'remotely', 'renewable', 'renewal', 'renewals', 'renowned', 'repair', 'replenishment', 'replication', 'report', 'reporting', 'reports', 'reposting', 'representative', 'reputation', 'reputed', 'request', 'require', 'required', 'requirement', 'requirements', 'requisition', 'research', 'researcher', 'researchers', 'reserach', 'reserving', 'resident', 'residential', 'residing', 'resolution', 'resolve', 'resolving', 'resource', 'resources', 'resourcing', 'response', 'responses', 'responsibility', 'responsive', 'responsys', 'rest', 'restaurant', 'restful', 'restructuring', 'result', 'retail', 'retention', 'retirement', 'retrieval', 'return', 'returns', 'reuters', 'revenue', 'reverse', 'review', 'reviewer', 'reviewing', 'reviews', 'revised', 'revising', 'rewards', 'rf', 'rfi', 'rfis', 'rfp', 'rfps', 'rfq', 'rfx', 'rhel', 'ring', 'risk', 'rm', 'rmbs', 'rms', 'ro', 'road', 'roadmap', 'roadmaps', 'robotic', 'robotics', 'roi', 'role', 'roles', 'roll', 'room', 'root', 'ror', 'rotating', 'rotational', 'route', 'row', 'rpa', 'rpg', 'rqa', 'rs', 'rsa', 'rta', 'rtl', 'rtr', 'ruby', 'rules', 'rural', 'russian', 'rwa', 's2p', 'sa', 'saas', 'safe', 'safety', 'sal', 'sale', 'sales', 'salesforce', 'sample', 'sampling', 'san', 'sanction', 'sanity', 'sap', 'sas', 'sata', 'satisfaction', 'sb', 'sbu', 'sc', 'scala', 'scalability', 'scale', 'scenario', 'scenarios', 'schedule', 'scheduled', 'scheduling', 'schema', 'school', 'schools', 'science', 'sciences', 'scientific', 'scientist', 'scientists', 'scikit', 'scipy', 'scm', 'scope', 'score', 'scouting', 'scraping', 'scratch', 'screening', 'scribe', 'script', 'scripting', 'scripts', 'scrum', 'sd', 'sda', 'sdd', 'sdet', 'sdk', 'sdks', 'sdl', 'sdlc', 'sdn', 'sdtm', 'se', 'search', 'searching', 'sec', 'secondary', 'secretarial', 'secretary', 'sector', 'sectors', 'secured', 'securities', 'security', 'segment', 'segmentation', 'selection', 'selenium', 'self', 'sell', 'selling', 'sem', 'semantics', 'semi', 'semiconductor', 'seminars', 'sencha', 'senior', 'sense', 'sensing', 'seo', 'series', 'server', 'servers', 'service', 'servicenow', 'services', 'servicing', 'servlets', 'setting', 'settlement', 'settlements', 'setup', 'sfdc', 'sfe', 'sg', 'sgs', 'share', 'shared', 'sharepoint', 'shares', 'sharp', 'sheet', 'sheets', 'shell', 'shift', 'shifts', 'ship', 'shipping', 'shoot', 'shooting', 'shop', 'shopify', 'shopper', 'shopping', 'shore', 'short', 'sibm', 'siebel', 'siem', 'sigma', 'signal', 'silicon', 'simulation', 'singapore', 'single', 'site', 'sitecatalyst', 'sitecore', 'size', 'sizing', 'sketch', 'skill', 'skills', 'sla', 'slas', 'small', 'smart', 'smartprice', 'sme', 'smm', 'smo', 'smoke', 'sms', 'snaplogic', 'snow', 'soa', 'soap', 'soar', 'soc', 'social', 'soft', 'software', 'sol', 'solar', 'solid', 'solman', 'solr', 'solution', 'solutioning', 'solutions', 'solving', 'sop', 'sops', 'sor', 'source', 'sourcing', 'south', 'sow', 'sox', 'sp', 'space', 'spanish', 'spare', 'spark', 'speaking', 'spec', 'special', 'specialisation', 'specialist', 'specialists', 'speciality', 'specialization', 'specialized', 'specialty', 'specification', 'specifications', 'speech', 'speed', 'spend', 'spi', 'spinning', 'spl', 'splunk', 'spoken', 'sponsorship', 'sports', 'spot', 'spotfire', 'spreadsheets', 'spring', 'sprint', 'spss', 'sqa', 'sql', 'sqlite', 'sqlserver', 'sqoop', 'sr', 'sre', 'srl', 'srm', 'srs', 'ssas', 'ssis', 'sso', 'ssrs', 'sssr', 'sta', 'staad', 'stability', 'stack', 'staff', 'staffing', 'stage', 'stakeholder', 'stakeholders', 'standard', 'standards', 'stanford', 'staples', 'start', 'startup', 'startups', 'stastical', 'stat', 'stata', 'state', 'statement', 'statements', 'static', 'statistical', 'statistician', 'statistics', 'stats', 'statutory', 'steam', 'steel', 'sterling', 'steward', 'stl', 'stock', 'stop', 'storage', 'store', 'stored', 'stores', 'stories', 'storm', 'storyboard', 'storyline', 'stpi', 'strategic', 'strategies', 'strategist', 'strategy', 'stream', 'streaming', 'street', 'stress', 'stressed', 'strong', 'structual', 'structural', 'structure', 'structured', 'structures', 'structuring', 'student', 'students', 'studies', 'studio', 'study', 'sub', 'subject', 'subjects', 'submission', 'success', 'successfactor', 'successfactors', 'suite', 'summarizer', 'summarizing', 'summary', 'sun', 'super', 'supervising', 'supervision', 'supervisor', 'supplier', 'supply', 'support', 'supporting', 'surfing', 'surveillance', 'survey', 'surveying', 'surveyor', 'sustainable', 'sustaining', 'svm', 'svn', 'svp', 'sw', 'swift', 'swing', 'switching', 'symfony', 'syndicate', 'syndicated', 'syndication', 'synopsys', 'synthesis', 'systematic', 'systems', 't24', 'ta', 'table', 'tableau', 'tables', 'tablets', 'tabular', 'tactical', 'tag', 'tagging', 'taking', 'talend', 'talent', 'taleo', 'tally', 'tamil', 'tandem', 'target', 'targeting', 'task', 'tat', 'tax', 'taxation', 'taxes', 'tb', 'tcc', 'tcl', 'tcp', 'tcs', 'tds', 'teacher', 'teachers', 'teaching', 'team', 'teams', 'tech', 'techmahindra', 'technical', 'technician', 'techniques', 'techno', 'technologies', 'technologist', 'technology', 'telco', 'tele', 'telecalling', 'telecom', 'telecommunication', 'telemarketing', 'telematics', 'teleperformance', 'telephonic', 'telephony', 'telesales', 'telugu', 'temenos', 'temporary', 'tender', 'tendering', 'tensorflow', 'teradata', 'term', 'territory', 'test', 'tester', 'testers', 'testing', 'testng', 'tests', 'text', 'textile', 'textiles', 'tfs', 'thane', 'theory', 'therapeutic', 'thermal', 'things', 'thingworx', 'thinker', 'thinking', 'thomson', 'thread', 'threading', 'threat', 'ticketing', 'tie', 'tier', 'tile', 'tiles', 'timber', 'time', 'timesheet', 'timing', 'timings', 'tiss', 'title', 'tivoli', 'tl', 'tlf', 'tlm', 'tm', 'tm1', 'tmt', 'toad', 'togaf', 'tolerance', 'tom', 'tomcat', 'tool', 'toolbox', 'tools', 'topics', 'topologies', 'tosca', 'total', 'totalagility', 'touch', 'tours', 'toxicology', 'tpa', 'tpf', 'tph', 'tpm', 'track', 'tracking', 'tractor', 'tracxn', 'trade', 'trader', 'trading', 'traditional', 'traffic', 'trafficker', 'trafficking', 'train', 'trainee', 'trainees', 'trainer', 'trainers', 'training', 'transaction', 'transactional', 'transcription', 'transcriptionist', 'transfer', 'transformation', 'transition', 'translation', 'translator', 'transmission', 'transparency', 'transport', 'transportation', 'travel', 'travelling', 'travels', 'treasury', 'treatment', 'trees', 'trend', 'trends', 'triage', 'trial', 'trials', 'tridion', 'triggers', 'trm', 'trouble', 'troubleshooting', 'trust', 'ts', 'tsql', 'ttt', 'tuesday', 'tuning', 'turbine', 'tv', 'tvs', 'twitter', 'type', 'typescript', 'typing', 'tyre', 'uae', 'uat', 'ucce', 'uft', 'ugam', 'uhg', 'ui', 'ui5', 'uipath', 'uk', 'ultra', 'ultrasound', 'uml', 'umrah', 'umts', 'undergrad', 'undergraduate', 'undergraduates', 'understanding', 'underwriter', 'underwriting', 'unica', 'unified', 'unify', 'unit', 'units', 'unity', 'universe', 'universities', 'university', 'unix', 'unsecured', 'updation', 'upgrade', 'ups', 'upsell', 'upselling', 'upstream', 'upto', 'upwork', 'urban', 'urgent', 'urgently', 'usa', 'usability', 'usage', 'usb', 'use', 'user', 'usfda', 'using', 'utilities', 'utility', 'uttar', 'uv', 'uvm', 'ux', 'va', 'vacancies', 'vacancy', 'valid', 'validation', 'validations', 'valuation', 'valuations', 'value', 'vap', 'vapi', 'vapt', 'var', 'variance', 'various', 'vashi', 'vat', 'vb', 'vba', 'vc', 'vdi', 'veetechnologies', 'vehicle', 'vehicles', 'velocity', 'vendor', 'venture', 'verbal', 'verification', 'verifier', 'verilog', 'veritas', 'version', 'vertex', 'vertica', 'vertical', 'verticals', 'vhdl', 'vice', 'video', 'view', 'views', 'virtual', 'virtualization', 'vis', 'visa', 'visio', 'vision', 'visual', 'visualisation', 'visualization', 'visualizing', 'vlookup', 'vm', 'voice', 'voip', 'volume', 'vp', 'vr', 'vs', 'vss', 'vsts', 'vtk', 'vugen', 'vulnerability', 'wages', 'walk', 'walkin', 'wan', 'want', 'warehouse', 'warehousing', 'warranty', 'waste', 'water', 'waterfall', 'watson', 'wave', 'waxes', 'wcf', 'wealth', 'wear', 'web', 'webdriver', 'webdynpro', 'weblogic', 'webmaster', 'webmethods', 'webservices', 'website', 'websites', 'websphere', 'webtrends', 'week', 'weekends', 'weekly', 'weka', 'welcome', 'welfare', 'west', 'wet', 'wfm', 'wheeler', 'white', 'wholesale', 'wi', 'wicket', 'win', 'wind', 'windchill', 'window', 'windows', 'winforms', 'wipro', 'wire', 'wireframe', 'wireless', 'wires', 'wise', 'wlan', 'wm', 'wns', 'women', 'womens', 'woocommerce', 'word', 'wordpress', 'work', 'workbench', 'workday', 'worker', 'workers', 'workflow', 'workforce', 'working', 'workplace', 'works', 'worksoft', 'world', 'worldwide', 'wpf', 'write', 'writer', 'writers', 'writing', 'written', 'wsdl', 'wso2', 'www', 'xamarin', 'xcode', 'xilinx', 'xlri', 'xml', 'xsd', 'xsjs', 'xsl', 'xslt', 'yahoo', 'year', 'years', 'yes', 'young', 'youtube', 'yr', 'yrs', 'zedo', 'zend', 'zfs', 'zonal', 'zone', 'zoology']

In [45]:
train.drop(drop,axis=1,inplace=True)
test.drop(drop,axis=1,inplace=True)
test.drop(['salary'],axis=1,inplace=True)

In [46]:
from xgboost import XGBRegressor

x = train.drop('salary',axis=1)
y = train.iloc[:,0]

xgb = XGBRegressor()
xgb.fit(x,y)
ypred = xgb.predict(test)

In [47]:
submission = pd.DataFrame(ypred,columns=['salary']).apply(lambda x: round(x)).astype('int').replace({1:'0to3',2:'3to6',3:'6to10',4:'10to15',5:'15to25',6:'25to50'})

In [48]:
submission.to_csv('submission.csv')

### Key variables

In [49]:
from sklearn.tree import DecisionTreeRegressor

dt = DecisionTreeRegressor()
dt.fit(x,y)
ypred = dt.predict(test)
pd.DataFrame(dt.feature_importances_,index=x.columns).sort_values(by=0,ascending=False).head()

Unnamed: 0,0
max_experience,0.498367
min_experience,0.102766
including,0.006013
,0.005915
Bengaluru,0.004572
