# Vacancy Analysis

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

## Technology Stack Analysis

### Task:
- Analyze the technology stacks mentioned in job listings to identify commonly required skills.
- Determine the frequency of specific technologies (e.g., SQL, Django, Docker) to understand their popularity among employers.

In [None]:
vacancy_df = pd.read_csv("data/python.csv")
vacancy_df = vacancy_df.dropna()

vacancy_df.head()

In [None]:
def get_technology_count_df(df: pd.DataFrame) -> pd.DataFrame:
    stack_series = df["stack"]
    
    stack_df = pd.DataFrame(stack_series.str.strip("['']").str.split("', '").explode())
    
    technology_counts = stack_df.value_counts()
    technology_counts = technology_counts.reset_index().set_index("stack")
    
    return technology_counts

In [None]:
technology_counts = get_technology_count_df(vacancy_df)
technology_counts.head()

In [None]:
def create_bar_chart(
        data: pd.Series,
        xlabel: str,
        ylabel: str,
        title: str,
        save: bool = False
) -> None:
    data = data.sort_values(by="count")
    data.plot(kind="barh", figsize=(10, 8))
    
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    
    plt.margins(y=0.9)
    plt.xticks()
    plt.tight_layout()
    
    if save:
        plt.savefig(f"{datetime.now().date()}-stack-analysis.jpg")
    
    plt.show()

In [None]:
technology_analysis_labels = {
    "xlabel": "Count",
    "ylabel": "Technology",
    "title": "Technology Stack Analysis"
}
create_bar_chart(technology_counts, **technology_analysis_labels, save=True)

### Location Analysis

#### Task:
- Count the number of job listings by location.
- Visualize the distribution of job listings on a chart.
- Analyze the popularity of different locations for job opportunities.

In [None]:
def shorten_location(df: pd.DataFrame) -> pd.DataFrame:
    location_series = df["location"]
    
    location_df = pd.DataFrame(location_series.str.split(", ").explode())
    
    length = 25
    shortened = pd.Series(
        label[:length] + "..."
        if len(label) > length else label
        for label in location_df["location"]
    )
    location_df["location"] = shortened
    
    return location_df

In [None]:
location_df = shorten_location(vacancy_df)
location_df.head()

In [None]:
location_counts = location_df.value_counts()
location_counts = location_counts.reset_index().set_index("location")

location_counts.head()

In [None]:
create_bar_chart(location_counts, "Location", "Count", "Working Location Analysis")

### Experience-Technology Relation Analysis

In [None]:
vacancy_1y_df = pd.read_csv("data/python_0-1.csv")
vacancy_1y_df = vacancy_1y_df.dropna()

vacancy_5y_df = pd.read_csv("data/python_5+.csv")
vacancy_5y_df = vacancy_5y_df.dropna()

vacancy_5y_df.head()

In [None]:
technology_counts_1y = get_technology_count_df(vacancy_1y_df)
technology_counts_5y = get_technology_count_df(vacancy_5y_df)

technology_counts_5y.head()

In [None]:
technology_1y_chart = create_bar_chart(technology_counts_1y, **technology_analysis_labels)
technology_5y_chart = create_bar_chart(technology_counts_5y, **technology_analysis_labels)