In [1]:
import pandas as pd
from datasets import load_dataset

# Loading dataset
dataset = load_dataset("lukebarousse/data_jobs")
df = dataset['train'].to_pandas()

# data cleanup
df['job_posted_date'] = pd.to_datetime(df.job_posted_date)

### Accessing data

In [66]:
df.iloc[:3]

Unnamed: 0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
0,Senior Data Engineer,Senior Clinical Data Engineer / Principal Clin...,"Watertown, CT",via Work Nearby,Full-time,False,"Texas, United States",2023-06-16 13:44:15,False,False,United States,,,,Boehringer Ingelheim,,
1,Data Analyst,Data Analyst,"Guadalajara, Jalisco, Mexico",via BeBee México,Full-time,False,Mexico,2023-01-14 13:18:07,False,False,Mexico,,,,Hewlett Packard Enterprise,"['r', 'python', 'sql', 'nosql', 'power bi', 't...","{'analyst_tools': ['power bi', 'tableau'], 'pr..."
2,Data Engineer,"Data Engineer/Scientist/Analyst, Mid or Senior...","Berlin, Germany",via LinkedIn,Full-time,False,Germany,2023-10-10 13:14:55,False,False,Germany,,,,ALPHA Augmented Services,"['python', 'sql', 'c#', 'azure', 'airflow', 'd...","{'analyst_tools': ['dax'], 'cloud': ['azure'],..."


In [67]:
df.iloc[0:2,0:4]

Unnamed: 0,job_title_short,job_title,job_location,job_via
0,Senior Data Engineer,Senior Clinical Data Engineer / Principal Clin...,"Watertown, CT",via Work Nearby
1,Data Analyst,Data Analyst,"Guadalajara, Jalisco, Mexico",via BeBee México


In [68]:
df.loc[0:2,'job_title':'job_via']

Unnamed: 0,job_title,job_location,job_via
0,Senior Clinical Data Engineer / Principal Clin...,"Watertown, CT",via Work Nearby
1,Data Analyst,"Guadalajara, Jalisco, Mexico",via BeBee México
2,"Data Engineer/Scientist/Analyst, Mid or Senior...","Berlin, Germany",via LinkedIn


In [69]:
df.loc[:,'salary_rate':'salary_hour_avg']

Unnamed: 0,salary_rate,salary_year_avg,salary_hour_avg
0,,,
1,,,
2,,,
3,,,
4,,,
...,...,...,...
785736,,,
785737,,,
785738,,,
785739,,,


### Data: Cleaning

In [70]:
df.loc[:,'salary_rate':'salary_hour_avg'].dropna(subset='salary_rate')

Unnamed: 0,salary_rate,salary_year_avg,salary_hour_avg
28,year,109500.0,
43,hour,,97.5
51,hour,,72.5
77,year,140000.0,
92,year,120000.0,
...,...,...,...
785624,year,139216.0,
785641,year,150000.0,
785648,year,221875.0,
785682,year,157500.0,


In [71]:
df.loc[:10,'salary_year_avg':'salary_hour_avg']

Unnamed: 0,salary_year_avg,salary_hour_avg
0,,
1,,
2,,
3,,
4,,
5,,
6,,
7,,
8,,
9,,


* `Cleaning` replacing the NaN values with median and removing the duplicate rows of job title.

In [72]:
median_salary_year = df['salary_year_avg'].median()
median_salary_hour = df['salary_hour_avg'].median()

In [73]:
df_fillna = df

In [74]:
df_fillna['salary_year_avg'] = df_fillna['salary_year_avg'].fillna(median_salary_year)
df_fillna['salary_hour_avg'] = df_fillna['salary_hour_avg'].fillna(median_salary_hour)

In [75]:
df_fillna.loc[:5,'salary_year_avg':'salary_hour_avg']

Unnamed: 0,salary_year_avg,salary_hour_avg
0,115000.0,45.98
1,115000.0,45.98
2,115000.0,45.98
3,115000.0,45.98
4,115000.0,45.98
5,115000.0,45.98


In [76]:
df_unique = df_fillna


In [77]:
df_unique=df_unique.drop_duplicates()

print('original no of rows df:', len(df_fillna))
print('no of unique rows df:', len(df_unique))
print('deleted rows = ', len(df_fillna)-len(df_unique))

original no of rows df: 785741
no of unique rows df: 785640
deleted rows =  101


In [78]:
df_unique = df_unique.drop_duplicates(subset=['job_title','company_name'])

print('original no of rows df:', len(df_fillna))
print('no of unique rows df:', len(df_unique))
print('deleted rows = ', len(df_fillna)-len(df_unique))

original no of rows df: 785741
no of unique rows df: 508042
deleted rows =  277699


### Data: Management

* `sample()`

In [2]:
df.tail()

Unnamed: 0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
785736,Software Engineer,DevOps Engineer,Singapura,melalui Trabajo.org,Pekerjaan tetap,False,Singapore,2023-03-13 06:16:16,False,False,Singapore,,,,CAREERSTAR INTERNATIONAL PTE. LTD.,"['bash', 'python', 'perl', 'linux', 'unix', 'k...","{'os': ['linux', 'unix'], 'other': ['kubernete..."
785737,Data Analyst,CRM Data Analyst,"Bad Rodach, Jerman",melalui BeBee Deutschland,Pekerjaan tetap,False,Germany,2023-03-12 06:18:18,False,False,Germany,,,,HABA FAMILYGROUP,"['sas', 'sas', 'sql', 'excel']","{'analyst_tools': ['sas', 'excel'], 'programmi..."
785738,Business Analyst,Commercial Analyst - Start Now,Malaysia,melalui Ricebowl,Pekerjaan tetap,False,Malaysia,2023-03-12 06:32:36,False,False,Malaysia,,,,Lendlease Corporation,"['powerpoint', 'excel']","{'analyst_tools': ['powerpoint', 'excel']}"
785739,Data Engineer,"Principal Associate, Data Engineer (Remote-Eli...","Newark, New Jersey, Amerika Serikat",melalui Recruit.net,Pekerjaan tetap,False,Sudan,2023-03-12 06:32:15,False,False,Sudan,,,,Capital One,"['python', 'go', 'nosql', 'sql', 'mongo', 'she...","{'cloud': ['aws', 'snowflake', 'azure', 'redsh..."
785740,Software Engineer,AWS System Analyst,India,melalui Trigyn,Pekerjaan tetap,False,India,2023-03-13 06:16:31,False,False,India,,,,Trigyn,"['aws', 'flow']","{'cloud': ['aws'], 'other': ['flow']}"


In [3]:
df.sample()

Unnamed: 0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
118566,Data Scientist,Data Scientist / Biostatistiker (m/w/d),Anywhere,via Indeed,Full-time,True,Germany,2023-05-16 08:26:21,False,False,Germany,,,,TCC GmbH,['r'],{'programming': ['r']}


In [4]:
df.sample(4)

Unnamed: 0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
289856,Data Engineer,Junior Software and Data Engineer,"Boston, MA",via ZipRecruiter,Full-time and Part-time,False,"Texas, United States",2023-11-28 17:07:37,False,True,United States,,,,Commonwealth of Massachusetts,"['java', 'python', 'javascript', 'c', 'c#', 'a...","{'cloud': ['aws'], 'other': ['github', 'puppet..."
73995,Data Analyst,Data Science Analyst,"Austin, TX",via Recruit.net,Full-time,False,"Texas, United States",2023-06-01 00:03:51,False,True,United States,,,,HelioHire,"['python', 'aws', 'linux']","{'cloud': ['aws'], 'os': ['linux'], 'programmi..."
644264,Data Analyst,Intermediate Big Data Analyst - AVP (Hybrid),"Dublin, Ireland",via LinkedIn,Full-time,False,Ireland,2023-08-08 12:32:20,False,False,Ireland,,,,Citi,"['sql', 'shell', 'python', 'aws', 'spark']","{'cloud': ['aws'], 'libraries': ['spark'], 'pr..."
477714,Senior Data Scientist,Data Platform Lead,"Seoul, South Korea",via Trabajo.org - 작업 검색,Full-time,False,South Korea,2023-01-20 15:29:45,False,False,South Korea,,,,AIA,"['php', 'java', 'sql', 'python', 'sql server',...","{'cloud': ['aws', 'aurora', 'snowflake'], 'dat..."


In [6]:
df.sample(4,random_state=42)

Unnamed: 0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
502901,Senior Data Engineer,Senior Data Engineer,"St Paul, MN",via BeBee,Full-time,False,"Florida, United States",2023-12-17 11:09:06,False,False,United States,,,,ManpowerGroup,"['sql', 'azure', 'git']","{'cloud': ['azure'], 'other': ['git'], 'progra..."
406337,Senior Data Scientist,Senior Analytics Engineer,"London, UK",via LinkedIn,Full-time,False,United Kingdom,2023-10-06 16:09:56,False,False,United Kingdom,,,,Harnham,"['sql', 'python', 'bigquery', 'snowflake', 'lo...","{'analyst_tools': ['looker'], 'cloud': ['bigqu..."
659951,Data Engineer,Data Engineer,"Newcastle upon Tyne, UK",via Indeed,Full-time,False,United Kingdom,2023-09-06 10:11:25,True,False,United Kingdom,,,,Morgan King,,
541593,Data Analyst,Data Analyst,Malta,via Trabajo.org,Full-time,False,Malta,2023-02-14 21:14:18,True,False,Malta,,,,Konnekt,['jira'],{'async': ['jira']}


* `copy()`

In [7]:
dataset = load_dataset("lukebarousse/data_jobs")
df_original = dataset['train'].to_pandas()

In [19]:
df_modified = df_original.copy()

In [20]:
print('Id of df_original :', id(df_original))
print('Id of df_modified :', id(df_modified))
id(df_original)==id(df_modified)


Id of df_original : 3200687042112
Id of df_modified : 3200423228768


False

In [21]:
median = df_modified['salary_year_avg'].median()

df_modified['salary_year_avg'] = df_modified['salary_year_avg'].fillna(median)

df_modified.loc[:5, 'salary_year_avg']

0    115000.0
1    115000.0
2    115000.0
3    115000.0
4    115000.0
5    115000.0
Name: salary_year_avg, dtype: float64

In [22]:
df_original.loc[:5, 'salary_year_avg']

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
Name: salary_year_avg, dtype: float64

* This concludes whatever the changes we made in df_modified doesn't affect the df_original 

### Index Management

In [1]:
import pandas as pd
from datasets import load_dataset

# Loading dataset
dataset = load_dataset("lukebarousse/data_jobs")
df = dataset['train'].to_pandas()

# data cleanup
df['job_posted_date'] = pd.to_datetime(df.job_posted_date)

In [3]:
df.index

RangeIndex(start=0, stop=785741, step=1)

In [15]:
df.index.name = 'Serial_no'

In [16]:
df.head(5)

Unnamed: 0_level_0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
Serial_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,Senior Data Engineer,Senior Clinical Data Engineer / Principal Clin...,"Watertown, CT",via Work Nearby,Full-time,False,"Texas, United States",2023-06-16 13:44:15,False,False,United States,,,,Boehringer Ingelheim,,
1,Data Analyst,Data Analyst,"Guadalajara, Jalisco, Mexico",via BeBee México,Full-time,False,Mexico,2023-01-14 13:18:07,False,False,Mexico,,,,Hewlett Packard Enterprise,"['r', 'python', 'sql', 'nosql', 'power bi', 't...","{'analyst_tools': ['power bi', 'tableau'], 'pr..."
2,Data Engineer,"Data Engineer/Scientist/Analyst, Mid or Senior...","Berlin, Germany",via LinkedIn,Full-time,False,Germany,2023-10-10 13:14:55,False,False,Germany,,,,ALPHA Augmented Services,"['python', 'sql', 'c#', 'azure', 'airflow', 'd...","{'analyst_tools': ['dax'], 'cloud': ['azure'],..."
3,Data Engineer,LEAD ENGINEER - PRINCIPAL ANALYST - PRINCIPAL ...,"San Antonio, TX",via Diversity.com,Full-time,False,"Texas, United States",2023-07-04 13:01:41,True,False,United States,,,,Southwest Research Institute,"['python', 'c++', 'java', 'matlab', 'aws', 'te...","{'cloud': ['aws'], 'libraries': ['tensorflow',..."
4,Data Engineer,Data Engineer- Sr Jobs,"Washington, DC",via Clearance Jobs,Full-time,False,Sudan,2023-08-07 14:29:36,False,False,Sudan,,,,Kristina Daniel,"['bash', 'python', 'oracle', 'aws', 'ansible',...","{'cloud': ['oracle', 'aws'], 'other': ['ansibl..."


In [17]:
df.index.dtype

dtype('int64')

* #### `reset_index()`

In [18]:
df_india = df[df['job_country']== "India"]
df_india

Unnamed: 0_level_0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
Serial_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
16,Data Engineer,Big Data Engineer,"Pune, Maharashtra, India",via LinkedIn,Full-time,False,India,2023-06-15 13:28:03,False,False,India,,,,Hexaware Technologies,"['powershell', 'python', 'sql', 'mysql', 'sql ...","{'cloud': ['databricks', 'azure'], 'databases'..."
19,Data Scientist,Data Scientist,"Pune, Maharashtra, India",via Indeed,Full-time,False,India,2023-04-26 13:20:12,False,False,India,,,,Bitwise Solutions,"['r', 'sql', 'python', 'scala', 'java', 'c++',...","{'analyst_tools': ['tableau'], 'libraries': ['..."
29,Data Scientist,Data Scientist,"Pune, Maharashtra, India",via Exusia,Full-time,False,India,2023-03-26 13:16:20,False,False,India,,,,Exusia,,
38,Data Analyst,Data Quality and Governance Analyst 1,India,via Trabajo.org,Full-time,False,India,2023-07-23 13:24:57,False,False,India,,,,"Jones Lang LaSalle IP, Inc","['sql', 'python', 'databricks', 'power bi', 'e...","{'analyst_tools': ['power bi', 'excel', 'word'..."
42,Senior Data Engineer,"Senior Data Engineer(Python)-1563,1637&1633","Chennai, Tamil Nadu, India",via HR Software For Growing Businesses | Fresh...,Full-time,False,India,2023-11-10 13:30:41,True,False,India,,,,CESIT,"['python', 'sql', 'aws', 'pyspark', 'docker', ...","{'async': ['jira'], 'cloud': ['aws'], 'librari..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
785712,Data Engineer,India - Collections Analyst II,India,melalui BeBee India,Pekerjaan tetap,False,India,2023-03-13 06:16:31,False,False,India,,,,Varite India Private Limited,"['excel', 'flow']","{'analyst_tools': ['excel'], 'other': ['flow']}"
785715,Data Analyst,Amul Careers 2023 - Apply Online - Data Analys...,India,melalui Jobsleworld - Jobs In India - Job Vaca...,Pekerjaan tetap,False,India,2023-03-13 06:16:28,False,False,India,,,,Amul,,
785716,Software Engineer,Application Support Analyst,India,melalui BeBee India,Pekerjaan tetap,False,India,2023-03-13 06:16:31,False,False,India,,,,CompuGroup Medical,"['c#', 'sql']","{'programming': ['c#', 'sql']}"
785718,Business Analyst,Senior Performance QA Analyst,India,melalui BeBee India,Pekerjaan tetap,False,India,2023-03-13 06:16:28,False,False,India,,,,Diebold Nixdorf,,


In [19]:
df_india.reset_index(inplace= True)
df_india.head(5)

Unnamed: 0,Serial_no,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
0,16,Data Engineer,Big Data Engineer,"Pune, Maharashtra, India",via LinkedIn,Full-time,False,India,2023-06-15 13:28:03,False,False,India,,,,Hexaware Technologies,"['powershell', 'python', 'sql', 'mysql', 'sql ...","{'cloud': ['databricks', 'azure'], 'databases'..."
1,19,Data Scientist,Data Scientist,"Pune, Maharashtra, India",via Indeed,Full-time,False,India,2023-04-26 13:20:12,False,False,India,,,,Bitwise Solutions,"['r', 'sql', 'python', 'scala', 'java', 'c++',...","{'analyst_tools': ['tableau'], 'libraries': ['..."
2,29,Data Scientist,Data Scientist,"Pune, Maharashtra, India",via Exusia,Full-time,False,India,2023-03-26 13:16:20,False,False,India,,,,Exusia,,
3,38,Data Analyst,Data Quality and Governance Analyst 1,India,via Trabajo.org,Full-time,False,India,2023-07-23 13:24:57,False,False,India,,,,"Jones Lang LaSalle IP, Inc","['sql', 'python', 'databricks', 'power bi', 'e...","{'analyst_tools': ['power bi', 'excel', 'word'..."
4,42,Senior Data Engineer,"Senior Data Engineer(Python)-1563,1637&1633","Chennai, Tamil Nadu, India",via HR Software For Growing Businesses | Fresh...,Full-time,False,India,2023-11-10 13:30:41,True,False,India,,,,CESIT,"['python', 'sql', 'aws', 'pyspark', 'docker', ...","{'async': ['jira'], 'cloud': ['aws'], 'librari..."


* #### `Set_index()`

In [21]:
df_india.set_index('Serial_no', inplace=True)
df_india.head(5)

Unnamed: 0_level_0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
Serial_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
16,Data Engineer,Big Data Engineer,"Pune, Maharashtra, India",via LinkedIn,Full-time,False,India,2023-06-15 13:28:03,False,False,India,,,,Hexaware Technologies,"['powershell', 'python', 'sql', 'mysql', 'sql ...","{'cloud': ['databricks', 'azure'], 'databases'..."
19,Data Scientist,Data Scientist,"Pune, Maharashtra, India",via Indeed,Full-time,False,India,2023-04-26 13:20:12,False,False,India,,,,Bitwise Solutions,"['r', 'sql', 'python', 'scala', 'java', 'c++',...","{'analyst_tools': ['tableau'], 'libraries': ['..."
29,Data Scientist,Data Scientist,"Pune, Maharashtra, India",via Exusia,Full-time,False,India,2023-03-26 13:16:20,False,False,India,,,,Exusia,,
38,Data Analyst,Data Quality and Governance Analyst 1,India,via Trabajo.org,Full-time,False,India,2023-07-23 13:24:57,False,False,India,,,,"Jones Lang LaSalle IP, Inc","['sql', 'python', 'databricks', 'power bi', 'e...","{'analyst_tools': ['power bi', 'excel', 'word'..."
42,Senior Data Engineer,"Senior Data Engineer(Python)-1563,1637&1633","Chennai, Tamil Nadu, India",via HR Software For Growing Businesses | Fresh...,Full-time,False,India,2023-11-10 13:30:41,True,False,India,,,,CESIT,"['python', 'sql', 'aws', 'pyspark', 'docker', ...","{'async': ['jira'], 'cloud': ['aws'], 'librari..."


* #### `sort_index()`

In [22]:
df_india.sort_index()

Unnamed: 0_level_0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills
Serial_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
16,Data Engineer,Big Data Engineer,"Pune, Maharashtra, India",via LinkedIn,Full-time,False,India,2023-06-15 13:28:03,False,False,India,,,,Hexaware Technologies,"['powershell', 'python', 'sql', 'mysql', 'sql ...","{'cloud': ['databricks', 'azure'], 'databases'..."
19,Data Scientist,Data Scientist,"Pune, Maharashtra, India",via Indeed,Full-time,False,India,2023-04-26 13:20:12,False,False,India,,,,Bitwise Solutions,"['r', 'sql', 'python', 'scala', 'java', 'c++',...","{'analyst_tools': ['tableau'], 'libraries': ['..."
29,Data Scientist,Data Scientist,"Pune, Maharashtra, India",via Exusia,Full-time,False,India,2023-03-26 13:16:20,False,False,India,,,,Exusia,,
38,Data Analyst,Data Quality and Governance Analyst 1,India,via Trabajo.org,Full-time,False,India,2023-07-23 13:24:57,False,False,India,,,,"Jones Lang LaSalle IP, Inc","['sql', 'python', 'databricks', 'power bi', 'e...","{'analyst_tools': ['power bi', 'excel', 'word'..."
42,Senior Data Engineer,"Senior Data Engineer(Python)-1563,1637&1633","Chennai, Tamil Nadu, India",via HR Software For Growing Businesses | Fresh...,Full-time,False,India,2023-11-10 13:30:41,True,False,India,,,,CESIT,"['python', 'sql', 'aws', 'pyspark', 'docker', ...","{'async': ['jira'], 'cloud': ['aws'], 'librari..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
785712,Data Engineer,India - Collections Analyst II,India,melalui BeBee India,Pekerjaan tetap,False,India,2023-03-13 06:16:31,False,False,India,,,,Varite India Private Limited,"['excel', 'flow']","{'analyst_tools': ['excel'], 'other': ['flow']}"
785715,Data Analyst,Amul Careers 2023 - Apply Online - Data Analys...,India,melalui Jobsleworld - Jobs In India - Job Vaca...,Pekerjaan tetap,False,India,2023-03-13 06:16:28,False,False,India,,,,Amul,,
785716,Software Engineer,Application Support Analyst,India,melalui BeBee India,Pekerjaan tetap,False,India,2023-03-13 06:16:31,False,False,India,,,,CompuGroup Medical,"['c#', 'sql']","{'programming': ['c#', 'sql']}"
785718,Business Analyst,Senior Performance QA Analyst,India,melalui BeBee India,Pekerjaan tetap,False,India,2023-03-13 06:16:28,False,False,India,,,,Diebold Nixdorf,,


In [24]:
median_pivot = df_india.pivot_table(values='salary_year_avg',index='job_title_short',aggfunc=['median','min','max'])
median_pivot

Unnamed: 0_level_0,median,min,max
Unnamed: 0_level_1,salary_year_avg,salary_year_avg,salary_year_avg
job_title_short,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Business Analyst,64800.0,43200.0,142025.0
Data Analyst,100500.0,32400.0,650000.0
Data Engineer,147500.0,43200.0,249000.0
Data Scientist,114008.0,43200.0,204381.0
Machine Learning Engineer,79200.0,32500.0,267000.0
Senior Data Analyst,111175.0,64800.0,177283.0
Senior Data Engineer,147500.0,35000.0,181765.0
Senior Data Scientist,149653.0,64800.0,170575.0
Software Engineer,79200.0,44418.5,200000.0


In [26]:
median_pivot.sort_values(by=('min','salary_year_avg'),ascending=False)

Unnamed: 0_level_0,median,min,max
Unnamed: 0_level_1,salary_year_avg,salary_year_avg,salary_year_avg
job_title_short,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Senior Data Scientist,149653.0,64800.0,170575.0
Senior Data Analyst,111175.0,64800.0,177283.0
Software Engineer,79200.0,44418.5,200000.0
Business Analyst,64800.0,43200.0,142025.0
Data Engineer,147500.0,43200.0,249000.0
Data Scientist,114008.0,43200.0,204381.0
Senior Data Engineer,147500.0,35000.0,181765.0
Machine Learning Engineer,79200.0,32500.0,267000.0
Data Analyst,100500.0,32400.0,650000.0


In [27]:
median_pivot.sort_index()

Unnamed: 0_level_0,median,min,max
Unnamed: 0_level_1,salary_year_avg,salary_year_avg,salary_year_avg
job_title_short,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Business Analyst,64800.0,43200.0,142025.0
Data Analyst,100500.0,32400.0,650000.0
Data Engineer,147500.0,43200.0,249000.0
Data Scientist,114008.0,43200.0,204381.0
Machine Learning Engineer,79200.0,32500.0,267000.0
Senior Data Analyst,111175.0,64800.0,177283.0
Senior Data Engineer,147500.0,35000.0,181765.0
Senior Data Scientist,149653.0,64800.0,170575.0
Software Engineer,79200.0,44418.5,200000.0
