### 1. Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

### 2. Loading the dataset

In [None]:
data = pd.read_csv('Labor_Force_Survey.csv')

### 3. Performing Exploratory Data Analysis (EDA)


We want to understand the shape of our data to know how many features and sample are present.

In [None]:
data.shape

Getting the information about the data helps in identifying features and handling  values that are missing.

In [None]:
data.info()

We handle null values by filling it with the mean of columnusing the fillna() method.

In [None]:
data.fillna(data.mode(), inplace=True)

#### Dropping of unnecessary features


We want to drop values that are deemed unnecessary and irrelevant. Removing these values simplifies the dataset.

In [None]:
features_to_remove = ['Region', 'Household Unique Sequential Number', '2010Urban-RuralFIES',
       'Final Weight Based on Projection (provincial projections)',
       'Survey Month', 'Survey Year', 'Psu Number', 'Replicate',
        'C101-Line Number', 'C05-Age as of Last Birthday', 
       'C19-Total Number of Hours Worked during the past week',
       'C20-Want More Hours of Work', 'C21-Look for Additional Work',
       'C24-Basis of Payment (Primary Occupation)',
       'C25-Basic Pay per Day (Primary Occupation)', 'C26-Other Job Indicator',
       'C27-Number of Jobs during the past week',
       'C28-Total Hours Worked for all Jobs',
       'C29-Reasons for Working More than 48 Hours during the past week',
       'C30-Looked for Work or Tried to Establish Business during the past week',
       'C31-First Time to Look for Work',
       'C33-Number of Weeks Spent in Looking for Work',
       'C34-Reason for not Looking for Work', 'C35-When Last Looked for Work',
       'C37-Willingness to take up work during the past week or within two weeks',
       'C38-Previous Job Indicator', 'C40-Previous Occupation',
       'C41-Did work or had a job during the past quarter',
       'C43-Kind of Business (past quarter)',
        'Unnamed: 49', 'Unnamed: 50',
       'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54']

data.drop(features_to_remove, axis=1, inplace=True)
data.head()

In [None]:
# @title Colors
colors = ["#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000", "#000000", "#FFFFFF"]
colors_grad = sns.color_palette('flare_r',  12)
colors_heat1 = sns.color_palette('flare_r', as_cmap=True)
colors_heat2 = sns.diverging_palette(315, 261, s=74, l=50, center='dark', as_cmap=True)

color_bg = "#1B181C"
color_text = "#FFFFFF"


sns.palplot(colors)
plt.gca().set_title('Basic Palette', fontsize=6, pad=10)

sns.palplot(colors_grad)
plt.gca().set_title('Gradient Palette', fontsize=10, pad=10)

plt.show()

In [None]:
# @title Plot settings
mpl.rcParams['figure.dpi'] = 600
mpl.rcParams["figure.figsize"] = 16,8

# Text
# mpl.rcParams['font.family'] = 'Roboto'

# Title
mpl.rcParams['figure.titlesize'] = 32
mpl.rcParams['axes.titlesize'] = 32
mpl.rcParams['axes.titleweight'] = 'bold'

# Labels
mpl.rcParams['axes.labelsize'] = 22
mpl.rcParams['xtick.labelsize'] = 22
mpl.rcParams['ytick.labelsize'] = 22

# Spacing
mpl.rcParams['axes.titlepad'] = 72
mpl.rcParams['axes.labelpad'] = 10
mpl.rcParams['xtick.major.pad'] = 10
mpl.rcParams['ytick.major.pad'] = 10
mpl.rcParams['xtick.major.width'] = 0
mpl.rcParams['xtick.minor.width'] = 0
mpl.rcParams['ytick.major.width'] = 0
mpl.rcParams['ytick.minor.width'] = 0

# Spines and grids
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.grid'] = False

# Legends
mpl.rcParams['legend.title_fontsize'] = 18
mpl.rcParams['legend.fontsize'] = 18
mpl.rcParams['legend.frameon'] = False

# Bars
plt.rcParams['patch.linewidth'] = 0
plt.rcParams['patch.edgecolor'] = 'none'

# Colors
mpl.rcParams["figure.facecolor"] = color_bg
mpl.rcParams["axes.facecolor"] = color_bg
mpl.rcParams["savefig.facecolor"] = color_bg

# Text colors
mpl.rcParams['text.color'] = color_text
mpl.rcParams['axes.labelcolor'] = color_text
mpl.rcParams['xtick.color'] = color_text
mpl.rcParams['ytick.color'] = color_text

# Line colorsc
mpl.rcParams['axes.edgecolor'] = color_text

#### Distribution of Male and Female in Workplace



In [None]:

employed_data = data[(data['C10-Overseas Filipino Indicator'] == 1) | (data['C11-Work Indicator'] == 1) | (data['C12-Job Indicator'] == 1) | (data['New Employment Criteria (jul 05, 2005)'] == 1) ]

total_male_count = data[data['C04-Sex'] == 1].shape[0]
total_female_count = data[data['C04-Sex'] == 2].shape[0]

employed_male_percentage = (employed_data[employed_data['C04-Sex'] == 1].shape[0] / total_male_count) * 100
employed_female_percentage = (employed_data[employed_data['C04-Sex'] == 2].shape[0] / total_female_count) * 100

genders = ['Male', 'Female']
percentages = [employed_male_percentage, employed_female_percentage]
colors = ['#648FFF', '#DC267F']

plt.bar(genders, percentages, color=colors)
plt.ylabel('Percentage')
plt.title('Percentage of Employed Individuals by Gender')
plt.show()




In [None]:
data_new = data.pivot_table(index='Household Size', columns='C04-Sex', aggfunc='size', fill_value=0)
data_to_list = data_new.index.tolist()

employed_data = data[(data['C10-Overseas Filipino Indicator'] == 1) | (data['C11-Work Indicator'] == 1) | (data['C12-Job Indicator'] == 1) | (data['New Employment Criteria (jul 05, 2005)'] == 1) ]
employed_data_new = employed_data.pivot_table(index='Household Size', columns='C04-Sex', aggfunc='size', fill_value=0)


# Get the counts for men and women
men_counts = (employed_data_new[1] / data_new[1]).tolist()
women_counts = (employed_data_new[2] / data_new[2]).tolist()

# Define a mapping of original labels to custom labels
label_mapping = {1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 
                 8: '8', 9: '9', 10: '10', 11: '11', 12: '12', 13: '13', 14: '14', 15: '15', 16: '16', 17: '17', 18: '18',
                 19: '19', 20: '20', 21: '21', 22: '22'}

# Map the original labels to custom labels
custom_labels = [label_mapping[dt] for dt in data_to_list]

# Define the width of each bar
bar_width = 0.35

# Set the positions for the bars
r1 = np.arange(len(data_to_list))
r2 = [x + bar_width for x in r1]

# Plot the bar graph
plt.bar(r1, men_counts, color='#648FFF', width=bar_width, label='Men')
plt.bar(r2, women_counts, color='#DC267F', width=bar_width, label='Women')

# Add labels and title
plt.xlabel('Household Size')
plt.ylabel('Percentage')
plt.title('Percentage of Gender Employment by Household Size')
plt.xticks([r + bar_width/2 for r in range(len(data_to_list))], custom_labels, rotation=45, ha='right')
plt.legend()

# Show the plot
plt.show()

In [None]:
data_new = data.pivot_table(index='C03-Relationship to Household Head', columns='C04-Sex', aggfunc='size', fill_value=0)
data_to_list = data_new.index.tolist()

employed_data = data[(data['C10-Overseas Filipino Indicator'] == 1) | (data['C11-Work Indicator'] == 1) | (data['C12-Job Indicator'] == 1) | (data['New Employment Criteria (jul 05, 2005)'] == 1) ]
employed_data_new = employed_data.pivot_table(index='C03-Relationship to Household Head', columns='C04-Sex', aggfunc='size', fill_value=0)


# Get the counts for men and women
men_counts = (employed_data_new[1] / data_new[1]).tolist()
women_counts = (employed_data_new[2] / data_new[2]).tolist()

# Define a mapping of original labels to custom labels
label_mapping = {1: 'Head', 2: 'Spouse', 3: 'Son/Daughter', 4: 'Brother/Sister', 5: 'Son/Daughter in Law', 6: 'Grandchildren', 7: 'Father/Mother', 
                 8: 'Other Relative', 9: 'Boarder', 10: 'Domestic Helper', 11: 'Non Relative'}

# Map the original labels to custom labels
custom_labels = [label_mapping[dt] for dt in data_to_list]

# Define the width of each bar
bar_width = 0.35

# Set the positions for the bars
r1 = np.arange(len(data_to_list))
r2 = [x + bar_width for x in r1]

# Plot the bar graph
plt.bar(r1, men_counts, color='#648FFF', width=bar_width, label='Men')
plt.bar(r2, women_counts, color='#DC267F', width=bar_width, label='Women')

# Add labels and title
plt.xlabel('Relationship to Household Head')
plt.ylabel('Percentage')
plt.title('Percentage of Gender Employment by their Relationship to Household Head')
plt.xticks([r + bar_width/2 for r in range(len(data_to_list))], custom_labels, rotation=45, ha='right')
plt.legend()

# Show the plot
plt.show()
print(data_to_list)
print(data_new)


In [None]:
data_new = data.pivot_table(index='C06-Marital Status', columns='C04-Sex', aggfunc='size', fill_value=0)
data_to_list = data_new.index.tolist()

employed_data = data[(data['C10-Overseas Filipino Indicator'] == 1) | (data['C11-Work Indicator'] == 1) | (data['C12-Job Indicator'] == 1) | (data['New Employment Criteria (jul 05, 2005)'] == 1) ]
employed_data_new = employed_data.pivot_table(index='C06-Marital Status', columns='C04-Sex', aggfunc='size', fill_value=0)


# Get the counts for men and women
men_counts = (employed_data_new[1] / data_new[1]).tolist()
women_counts = (employed_data_new[2] / data_new[2]).tolist()

# Define a mapping of original labels to custom labels
label_mapping = {1: 'Single', 2: 'Married', 3: 'Widowed', 4: 'Divorce/Seperate', 5: 'Annulled', 6: 'Unknown'}

# Map the original labels to custom labels
custom_labels = [label_mapping[dt] for dt in data_to_list]

# Define the width of each bar
bar_width = 0.35

# Set the positions for the bars
r1 = np.arange(len(data_to_list))
r2 = [x + bar_width for x in r1]

# Plot the bar graph
plt.bar(r1, men_counts, color='#648FFF', width=bar_width, label='Men')
plt.bar(r2, women_counts, color='#DC267F', width=bar_width, label='Women')

# Add labels and title
plt.xlabel('Marital Status')
plt.ylabel('Percentage')
plt.title('Percentage of Gender Employment by Marital Status')
plt.xticks([r + bar_width/2 for r in range(len(data_to_list))], custom_labels, rotation=45, ha='right')
plt.legend()

# Show the plot
plt.show()

fgdmvjn

In [None]:
data_new = data.pivot_table(index='C17-Nature of Employment (Primary Occupation)', columns='C04-Sex', aggfunc='size', fill_value=0)
data_to_list = data_new.index.tolist()

employed_data = data[(data['C10-Overseas Filipino Indicator'] == 1) | (data['C11-Work Indicator'] == 1) | (data['C12-Job Indicator'] == 1) | (data['New Employment Criteria (jul 05, 2005)'] == 1) ]
employed_data_new = employed_data.pivot_table(index='C17-Nature of Employment (Primary Occupation)', columns='C04-Sex', aggfunc='size', fill_value=0)


# Get the counts for men and women
men_counts = (employed_data_new[1] / data_new[1]).tolist()
women_counts = (employed_data_new[2] / data_new[2]).tolist()

# Define a mapping of original labels to custom labels
label_mapping = {1: 'Permanent Job', 2: 'Short-term', 3: 'Different Employer'}

# Map the original labels to custom labels
custom_labels = [label_mapping[dt] for dt in data_to_list]

# Define the width of each bar
bar_width = 0.35

# Set the positions for the bars
r1 = np.arange(len(data_to_list))
r2 = [x + bar_width for x in r1]

# Plot the bar graph
plt.bar(r1, men_counts, color='#648FFF', width=bar_width, label='Men')
plt.bar(r2, women_counts, color='#DC267F', width=bar_width, label='Women')

# Add labels and title
plt.xlabel('Nature of Employment')
plt.ylabel('Percentage')
plt.title('Percentage of Gender Employment by Nature of Employment')
plt.xticks([r + bar_width/2 for r in range(len(data_to_list))], custom_labels, rotation=45, ha='right')
plt.legend()

# Show the plot
plt.show()