# Data Visualization


In [1]:
import pandas as pd
import plotly.express as px
import re


In [28]:
color_palette = [
    "#82AAE3",
    "#91D8E4",
    "#BFEAF5",
    "#EAFDFC",
    "#FFD998",
    "#FFD373",
    "#FFC54D",
    "#FFB927",
    "#FFAD00",
    "#FF9A00",
    "#FF8700",
    "#FF7400",
]
LEGEN_BG = "#fff"
BG_STYLE = {"background-color": "#fff"}

In [20]:
data = pd.read_csv("../data/data_v5.csv")
data = data[
    [
        "id",
        "date",
        "job_title",
        "abstract_title",
        "company_name",
        "avg_salary",
        "location",
        "job_type",
        "skills",
        "experience_level",
        "avg_exp_years",
    ]
]
data.date = pd.to_datetime(data.date).dt.date
data.shape


(832, 11)

In [21]:
data.head()


Unnamed: 0,id,date,job_title,abstract_title,company_name,avg_salary,location,job_type,skills,experience_level,avg_exp_years
0,1567,2022-01-02,junior/senior developer,/ developer,,,,full-time,"php, laravel, sql, javascript, html5, css, boo...",senior,
1,1568,2022-01-03,cashier,cashier,euroline-rent,,Damascus - Syria,full-time,"english language, jobseekers, jobseeker, inter...",,3.0
2,1569,2022-01-03,.net developer,.net developer,national technology group (ntg),,Damascus - Syria,full-time,"asp core, reactjs",,1.0
3,1570,2022-01-03,employees for operations department,employees for operations department,in damascus announces the vacancy for full-tim...,,Damascus - Syria,full-time,"perfect knowledge & use for english, basic com...",,
4,1571,2022-01-03,awasol developer,awasol developer,smartech-sy,,remotely,,,,


## Checking Null-Values Percentage

null values indicates that job posts are missing this info <br>

- 95% of companies do not provide salary range in their job post.
- 60% do not mention how many years of experience woul be needed.
- 39% do not provide a location.
- 24% do not specify qualifications or requirements.
- 25% of job posts do not contain the company name.

job title and abstract title have zero null-values because we already dropped missing values.


In [39]:
px.bar(
    round(data.loc[:,'company_name':].isna().sum() / data.shape[0] * 100, 2),
    text_auto=True,
    color_discrete_sequence=color_palette[1:],
    labels={'value':'Percentage','index':''},
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    #plot_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
)


### Posting Frequency

as the plot shows, there are 3 new job posts on average each day in Syria, assuming that 95% percent of the data are records of a job post


In [44]:
px.line(
    data.groupby("date", as_index=False).size(),
    x="date",
    y="size",
    color_discrete_sequence=color_palette[1:],
    labels={"size":"Count" , "date":"Date"},
    title="Posts Count Per Day"
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    #plot_bgcolor="",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
)


### Job Title Demands

we previously mentioned that the telegram channel that we scraped data from, is a tech-oriented channel, most of the job posts are tech-related vacancies. <br>

Ploting the top 50 titles, we can see that there is a high demand on fron-end developers, followed by back-end, which make sense, then ui/ux developers.


In [46]:
px.bar(
    data.groupby("abstract_title", as_index=False)
    .size()
    .sort_values(by="size", ascending=False)[:50],
    x="abstract_title",
    y="size",
    color_discrete_sequence=color_palette[1:],
    title="Job Title Vacancies",
    labels={"size":"Job Posts Count", "abstract_title":"Title"}
).update_xaxes(categoryorder="total descending").update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    #plot_bgcolor="",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
)



### Average Salaries


- out of 832 job posts, only 42 of them provided salary range
- The average salary is around 2M Syrian Pounds.
- Most companies salary are between 2M and 2.5M S.P


In [8]:
print(f"Average Salary: {round(data.avg_salary.mean()):,}")
print(f"Available Salaries count: {data.avg_salary.notna().sum()}")
print(
    f"Minimum Salary {int(data.avg_salary.min()):,} SP Max Salary: {int(data.avg_salary.max()):,} SP"
)


Average Salary: 2,002,381
Available Salaries count: 42
Minimum Salary 350,000 SP Max Salary: 6,000,000 SP


In [49]:
px.histogram(
    data.avg_salary[data.avg_salary < 10000000],
    nbins=20,
    color_discrete_sequence=color_palette[1:],
    labels={"count": "", "value": "Salary Range"},
    title="Average Salaries",
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
)


### Job Location

- 60% of the jobs are in Damascus
- 30% are remote vacancies
- ~5% are hybrid jobs, it means you can work remotely or on site
- there are other locations like Lebanon, Dubai, Turkey, Germany and Iraq


In [52]:
px.histogram(data, x="location", histnorm="probability density",
color_discrete_sequence=color_palette[1:],
    labels={"probability density": "Percentage", "location": "Job Location"},
    title="Job Location",
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
).update_xaxes(
    categoryorder="total descending"
)


### Expreience Years

- most of the jobs require a prior experience of 1-3 years
- only 3 job posts asked for a fresh graduate


In [61]:
px.histogram(
    data,
    x='avg_exp_years',
    color_discrete_sequence=color_palette[1:],
    labels={"count": "", "avg_exp_years": "Experience Years"},
    title="Average Experience Years",
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
)


In [67]:
px.histogram(data, "experience_level",
color_discrete_sequence=color_palette[1:],
title="Experience Level Required",
labels={"count":"", "experience_level": "Experience Level"}
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
)


### Job Type

- 93% of the jobs are full-time job.
- only 2% are part-time jobs.
- only 2 training opportunities.
- only 2 task-based jobs.


In [70]:
px.histogram(data.job_type, text_auto=True,color_discrete_sequence=color_palette[1:],
title="Jobs Type",labels={"value":"Job type"}).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
).update_xaxes(
    categoryorder="total descending"
)


## Checking Skills


In [13]:
skill_set = [
    r"\.net",
    "adobe",
    "ajax",
    "android",
    "angular",
    r"api\s",
    "asp",
    "aws",
    "bitbucket",
    "blade",
    "bloc",
    "bootstrap",
    r"\sc\s",
    r"c\+\+",
    "cloud",
    "css",
    "database",
    "debugging",
    "devops",
    "django",
    "docker",
    "e-commerce",
    "erp",
    "excel",
    "figma",
    "firebase",
    "flutter",
    "git",
    "github",
    "google analytics",
    "html",
    "illustrator",
    "indesign",
    "ios",
    r"java\s",
    "javascript",
    "jquery",
    r"\sjs\s",
    "json",
    "machine learning",
    "micro-services",
    "microsoft office",
    "mvc",
    "mysql",
    "nestjs",
    "networking",
    "next",
    "nlp",
    "nlu",
    "node",
    "nosql",
    "object-oriented",
    "objective-c",
    "odoo",
    "oracle",
    "oop",
    "photoshop",
    "php",
    "adobe premier",
    "preparation",
    "problem-solving",
    "python",
    "react",
    "react-native",
    "redis",
    "redux",
    "research",
    "reporting",
    "revit",
    "scrum",
    "selenium",
    "seo",
    "spring",
    "springboot",
    "sql",
    "statistics",
    "swift",
    "system analysis",
    "tailwind",
    "testing",
    "threejs",
    "twilio",
    "typescript",
    "typography",
    "unittest",
    "version control",
    "vue",
    "web-security",
    "webpack",
    "webform",
    "websocket",
    "widgets",
    "wordpress",
    "adobe xd",
    "web scraping",
    "vanilla javascript",
    "software design",
    "es6",
    "state management",
    "system design",
]


In [14]:
def clean_skills(text, res=None):
    res = {} if not res else res
    keywords = text.split(",")
    for kw in keywords:
        for skill in skill_set:
            # print(skill,kw)
            if re.search(skill, kw):
                res[skill] = res.get(skill, 0) + 1
    return res


In [15]:
res = {}
for sk in " ".join(data.skills.astype("str").values).split(","):
    res = clean_skills(sk, res)


In [82]:
# pd.DataFrame.from_dict(res,orient='index')
px.bar(pd.Series(res).sort_values(ascending=False)[:50],color_discrete_sequence=color_palette[1:],
title="Top 50 Required Skills",
labels={"value":"Job Posts Count","index":"Skill"}).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
)#.update_xaxes(categoryorder="total descending")


### Top Required Skills

overall the job posts and vacancies, the most required skills for all the jobs are the following:

1. css
2. html
3. react
4. sql
5. javascript
6. git
7. php
8. testing
9. .net


### Requirements of Each Job Title

- front-end top required skills
- back-end top required skills
- full-stack top required skills
- mobile app developer top required skills
- ui/ux designer top required skills


In [17]:
grouped_skills_by_jobtitle = (
    data.loc[data.skills.notna()]
    .groupby("abstract_title", as_index=False)
    .skills.apply(lambda x: " ".join(x))
)
grouped_skills_by_jobtitle["skills_dict"] = grouped_skills_by_jobtitle.skills.apply(
    clean_skills
)


In [83]:
px.bar(
    pd.DataFrame.from_dict(
        grouped_skills_by_jobtitle.loc[
            grouped_skills_by_jobtitle.abstract_title == "front-end"
        ].skills_dict.values[0],
        orient="index",
    ),
    title="Front-end Developer Required Skills",
    labels={"index": "Skill", "value": "", "variable": ""}
,color_discrete_sequence=color_palette[1:],
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
).update_xaxes(categoryorder="total descending")


In [84]:
px.bar(
    pd.DataFrame.from_dict(
        grouped_skills_by_jobtitle.loc[
            grouped_skills_by_jobtitle.abstract_title == "back-end"
        ].skills_dict.values[0],
        orient="index",
    ),
    title="Back-End Developer Required Skills",
    labels={"index": "Skill", "value": "", "variable": ""}
,color_discrete_sequence=color_palette[1:],
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
).update_xaxes(categoryorder="total descending")


In [85]:
px.bar(
    pd.DataFrame.from_dict(
        grouped_skills_by_jobtitle.loc[
            grouped_skills_by_jobtitle.abstract_title == "full-stack"
        ].skills_dict.values[0],
        orient="index",
    ),
    title="Full-Stack Developer Required Skills",
    labels={"index": "Skill", "value": "", "variable": ""},color_discrete_sequence=color_palette[1:],
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
).update_xaxes(categoryorder="total descending")


In [86]:
px.bar(
    pd.DataFrame.from_dict(
        grouped_skills_by_jobtitle.loc[
            grouped_skills_by_jobtitle.abstract_title == "ui/ux designer"
        ].skills_dict.values[0],
        orient="index",
    ),
    title="UI/UX Designer Required Skills",
    labels={"index": "Skill", "value": "", "variable": ""},color_discrete_sequence=color_palette[1:],
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
).update_xaxes(categoryorder="total descending")


In [87]:
px.bar(
    pd.DataFrame.from_dict(
        grouped_skills_by_jobtitle.loc[
            grouped_skills_by_jobtitle.abstract_title == "mobile app developer"
        ].skills_dict.values[0],
        orient="index",
    ),
    title="Mobile App Developer Required Skills",
    labels={"index": "Skill", "value": "", "variable": ""},color_discrete_sequence=color_palette[1:],
).update_layout(
    paper_bgcolor="rgb(0,0,0,0)",
    legend=dict(bgcolor=LEGEN_BG),
    font_family="Poppins,",
).update_xaxes(categoryorder="total descending")


top required front end framework is react.js, followed by next.js, angular, and vue.js <br>
for backend, .net is the most required followed by php, python, node.js and django. <br>
fullstack developers should know sql, php, react, vue and node.js <br>
flutter is the top required skill for mobile app developers, ios came in the second place and then react native.
