In [None]:

from pyspark.sql import SparkSession
import pandas as pd
import plotly.express as px
import plotly.io as pio
import numpy as np

np.random.seed(42)

pio.renderers.default = "notebook"

# Initialize Spark Session
spark = SparkSession.builder.appName("LightcastData").getOrCreate()

# Load Data
df = spark.read.option("header", "true").option("inferSchema", "true").option("multiLine","true").option("escape", "\"").csv("./data/lightcast_job_postings.csv")

# Show Schema and Sample Data
print("---This is Diagnostic check, No need to print it in the final doc---")

df.printSchema() # comment this line when rendering the submission
df.show(5)

In [None]:
from pyspark.sql import SparkSession

df.createOrReplaceTempView("jobs")

In [None]:
skill_counts_by_type = spark.sql("""
    SELECT software_skills_name, COUNT(*) AS count
    FROM jobs
    WHERE LOWER(title_name) LIKE '%analyst%'
    GROUP BY software_skills_name
    ORDER BY count DESC
    LIMIT 10
""")
skill_counts_by_type.show(truncate=False)

In [None]:
skill_counts_by_type = spark.sql("""
    SELECT skills_name, COUNT(*) AS count
    FROM jobs
    WHERE LOWER(title_name) LIKE '%analyst%'
    GROUP BY skills_name
    ORDER BY count DESC
    LIMIT 10
""")
skill_counts_by_type.show(truncate=False)

In [None]:
import pandas as pd

skills_data = {
    "Name": ["Alysaa", "Adam", "Yihan"],
    "Python": [5, 3, 4, 2],
    "SQL": [4, 2, 5, 3],
    "Machine Learning": [3, 1, 4, 2],
    "Cloud Computing": [2, 2, 3, 1]
}

df_skills = pd.DataFrame(skills_data)
df_skills.set_index("Name", inplace=True)
df_skills

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

# Define a custom minty color gradient
minty_colors = ["#e0f8f1", "#a0eec0", "#4fd2a1", "#1aab89"]
minty_cmap = LinearSegmentedColormap.from_list("minty", minty_colors)

# Create the heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(df_skills, annot=True, cmap=minty_cmap, linewidths=0.5)
plt.title("Team Skill Levels Heatmap")
plt.show()