In [98]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [99]:
# Loading the dataset

data1 = pd.read_csv("/content/salaries.csv")

In [None]:
# Displaying first 5 rows

data1.head()

In [None]:
# Checking shape, column names, data types

data1.info()

In [None]:
data1.describe()

In [None]:
data1.shape

In [None]:
# Checking for missing values

data1.isnull().sum()

In [None]:
data1.isnull()

In [None]:
# Handling missing values by droping them

data1.dropna()

In [108]:
# Cleaning column names

data1.columns = data1.columns.str.lower().str.replace('_', ' ')

In [None]:
data1.head()

In [None]:
# Removing duplicate rows

data1.drop_duplicates()

In [None]:
# Ensuring salary column is numeric

data1['salary']=pd.to_numeric(data1['salary'], errors='coerce')
data1.info()

In [None]:
# Unique job titles

print(data1['job title'].unique())

In [None]:
data1.head()

In [None]:
#Companies offering highest average salaries

highest_salary = data1.groupby('company location')['salary'].mean().sort_values(ascending=False)
print(highest_salary.head())

In [None]:
#Average salary by job title

average_salary = data1.groupby('job title')['salary'].mean()
print(average_salary)

In [None]:
#Number of jobs per location

job_location = data1['company location'].value_counts()
print(job_location)

In [None]:
data1.head()

In [None]:
plt.bar(data1['job title'], data1['salary'])
plt.xlabel('Job Title')
plt.ylabel('Salary')
plt.title('Job Title vs. Average Salary')
plt.xticks(rotation=90)
plt.show()

In [None]:
top5_companies = highest_salary.head(5).index
plt.figure(figsize=(10,6))
sns.boxplot(x='company location', y='salary', data=data1[data1['company location'].isin(top5_companies)])
plt.title('Salary Distribution for Top 5 Companies')
plt.xticks(rotation=45)
plt.show()


In [None]:
plt.figure(figsize=(12,6))
sns.countplot(y='company location', data=data1, order=data1['company location'].value_counts().index)
plt.title('Number of Jobs per Location')
plt.xlabel('Number of Jobs')
plt.ylabel('Location')
plt.show()


In [129]:
if 'year' in data1.columns:
    yearly_salary = data1.groupby('work year')['salary'].mean()
    plt.figure(figsize=(10,5))
    sns.lineplot(x=yearly_salary.index, y=yearly_salary.values)
    plt.title('Average Salary Trend Over Years')
    plt.xlabel('Year')
    plt.ylabel('Average Salary')
    plt.show()


In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(data1.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
#After analyzing the dataset and creating visualizations, here are some of the key takeaways:

#Job Titles and Salaries:
#Certain roles like Machine Learning Engineer and Data Scientist tend to offer the highest average salaries. On the other hand, entry-level positions such as Data Analyst or ML Intern typically come with lower pay. This suggests that specialized and experienced roles in AI and ML are more valued in the job market.

#Top-Paying Companies:
#A few companies clearly stand out for offering better compensation. The top 5 companies in the dataset not only pay higher salaries but also show a wider range of salary distribution, which could be due to offering roles at different experience levels—from junior to senior positions.

#Jobs by Location:
#When it comes to where the jobs are, most listings are concentrated in major tech hubs like San Francisco, New York, or Berlin. Interestingly, some locations with fewer job listings still offer very competitive salaries, indicating that location doesn't always equal compensation.

#Salary Range and Outliers:
#The box plots revealed a wide variation in salaries across different companies. There are also a few outliers—extremely high or low salaries—which may represent executive roles or internships, respectively.

#Trends Over Time (if year data is available):
#If the dataset includes data across multiple years, there's a noticeable upward trend in salaries. This reflects the growing demand and investment in the fields of Data Science, AI, and Machine Learning.

#Correlations and Patterns:
#From the heatmap, we noticed some correlation between salary and other numerical features like experience or company size. This supports the idea that more experience usually leads to better pay.

In [None]:
import plotly.express as px
fig = px.bar(average_salary.sort_values(ascending=False), title='Average Salary by Job Title')
fig.show()