In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datasets import load_dataset

data = load_dataset("lukebarousse/data_jobs")
df = data["train"].to_pandas()

df["job_posted_date"] = pd.to_datetime(df.job_posted_date)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 785741 entries, 0 to 785740
Data columns (total 17 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   job_title_short        785741 non-null  object        
 1   job_title              785740 non-null  object        
 2   job_location           784696 non-null  object        
 3   job_via                785733 non-null  object        
 4   job_schedule_type      773074 non-null  object        
 5   job_work_from_home     785741 non-null  bool          
 6   search_location        785741 non-null  object        
 7   job_posted_date        785741 non-null  datetime64[ns]
 8   job_no_degree_mention  785741 non-null  bool          
 9   job_health_insurance   785741 non-null  bool          
 10  job_country            785692 non-null  object        
 11  salary_rate            33067 non-null   object        
 12  salary_year_avg        22003 non-null   floa

In [7]:
# Now Say I want dataframes representing the data of individual months 
# One way to manually create each dfs using condition but its so tidious work
# We want to automate that.
# So What IF we create a dictionary where each key representing the month and its associate value contain data for that month
# Thats where we use dictionary comprehension similar to list comprehension, where we use for loop to atomate the process.

# Okay first for defining keys we neeed a array of months name So lets create it first

In [5]:
df_og = df.copy()

In [6]:
df["job_posted_month"] = df.job_posted_date.dt.strftime("%b")

df.head()

Unnamed: 0,job_title_short,job_title,job_location,job_via,job_schedule_type,job_work_from_home,search_location,job_posted_date,job_no_degree_mention,job_health_insurance,job_country,salary_rate,salary_year_avg,salary_hour_avg,company_name,job_skills,job_type_skills,job_posted_month
0,Senior Data Engineer,Senior Clinical Data Engineer / Principal Clin...,"Watertown, CT",via Work Nearby,Full-time,False,"Texas, United States",2023-06-16 13:44:15,False,False,United States,,,,Boehringer Ingelheim,,,Jun
1,Data Analyst,Data Analyst,"Guadalajara, Jalisco, Mexico",via BeBee México,Full-time,False,Mexico,2023-01-14 13:18:07,False,False,Mexico,,,,Hewlett Packard Enterprise,"['r', 'python', 'sql', 'nosql', 'power bi', 't...","{'analyst_tools': ['power bi', 'tableau'], 'pr...",Jan
2,Data Engineer,"Data Engineer/Scientist/Analyst, Mid or Senior...","Berlin, Germany",via LinkedIn,Full-time,False,Germany,2023-10-10 13:14:55,False,False,Germany,,,,ALPHA Augmented Services,"['python', 'sql', 'c#', 'azure', 'airflow', 'd...","{'analyst_tools': ['dax'], 'cloud': ['azure'],...",Oct
3,Data Engineer,LEAD ENGINEER - PRINCIPAL ANALYST - PRINCIPAL ...,"San Antonio, TX",via Diversity.com,Full-time,False,"Texas, United States",2023-07-04 13:01:41,True,False,United States,,,,Southwest Research Institute,"['python', 'c++', 'java', 'matlab', 'aws', 'te...","{'cloud': ['aws'], 'libraries': ['tensorflow',...",Jul
4,Data Engineer,Data Engineer- Sr Jobs,"Washington, DC",via Clearance Jobs,Full-time,False,Sudan,2023-08-07 14:29:36,False,False,Sudan,,,,Kristina Daniel,"['bash', 'python', 'oracle', 'aws', 'ansible',...","{'cloud': ['oracle', 'aws'], 'other': ['ansibl...",Aug


In [None]:
month_arr = df.job_posted_month.unique()

month_arr

In [10]:
# Okay now we are ready to do dictionary comprehension

In [12]:
df_dict_month = {month : df[df["job_posted_month"] == month] for month in month_arr }

df_dict_month

{'Jun':                   job_title_short  ... job_posted_month
 0            Senior Data Engineer  ...              Jun
 8                Business Analyst  ...              Jun
 16                  Data Engineer  ...              Jun
 17                 Data Scientist  ...              Jun
 24                 Data Scientist  ...              Jun
 ...                           ...  ...              ...
 785347             Data Scientist  ...              Jun
 785571               Data Analyst  ...              Jun
 785617               Data Analyst  ...              Jun
 785669  Machine Learning Engineer  ...              Jun
 785675               Data Analyst  ...              Jun
 
 [61572 rows x 18 columns],
 'Jan':           job_title_short  ... job_posted_month
 1            Data Analyst  ...              Jan
 20         Data Scientist  ...              Jan
 39          Data Engineer  ...              Jan
 53          Data Engineer  ...              Jan
 55         Data Scientist 

In [13]:
# Hoorey!! We created a dictionary of demand don't believe okay lets call the data of Jan and check it

In [16]:
df_dict_month["Jan"]["job_posted_month"].value_counts()

job_posted_month
Jan    91822
Name: count, dtype: int64

In [17]:
# See when we pull the df store in "Jan" key and check for how many time each month(s) is mention in the df,
# We get just the Jan entry.

In [18]:
# Syntax for dict Comprehension :
# dict = {key:values for key in array/list/...}
# Point to be noted : values can be any datatype from str to dataframes; even you can mention the code or condition
#                     which create that datatype 
# IOC, df[df["job_posted_month"] == month(key)] is a condition to create a specific df. 