### 1. Importing Libraries

In [1]:
# @title Libraries
import numpy as np
import pandas as pd
import geopandas as gpd

import matplotlib as mpl
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import seaborn as sns

import datetime

### 2. Loading the dataset

In [47]:
c14 = pd.read_csv('research_question_1_c14.csv')
c16 = pd.read_csv('research_question_1_c16.csv')
c23 = pd.read_csv('research_question_1_c23.csv')

### 3. Performing Exploratory Data Analysis (EDA)


We want to understand the shape of our data to know how many features and sample are present.

In [3]:
data.shape

(20, 3)

Getting the information about the data helps in identifying features and handling  values that are missing.

In [None]:
data.info()

We handle null values by filling it with the mean of columnusing the fillna() method.

We want to drop values that are deemed unnecessary and irrelevant. Removing these values simplifies the dataset.

In [None]:
data.head()

In [None]:
# @title Download fonts
!wget 'https://github.com/openmaptiles/fonts/raw/master/roboto/Roboto-Light.ttf'
!wget 'https://github.com/openmaptiles/fonts/raw/master/roboto/Roboto-Regular.ttf'
!wget 'https://github.com/openmaptiles/fonts/raw/master/roboto/Roboto-Medium.ttf'
!wget 'https://github.com/openmaptiles/fonts/raw/master/roboto/Roboto-Bold.ttf'

In [None]:
# @title Colors
colors = ["#648FFF", "#785EF0", "#DC267F", "#FE6100", "#FFB000", "#000000", "#FFFFFF"]
colors_grad = sns.color_palette('flare_r',  12)
colors_heat1 = sns.color_palette('flare_r', as_cmap=True)
colors_heat2 = sns.diverging_palette(315, 261, s=74, l=50, center='dark', as_cmap=True)

color_bg = "#1B181C"
color_text = "#FFFFFF"


sns.palplot(colors)
plt.gca().set_title('Basic Palette', fontsize=6, pad=10)

sns.palplot(colors_grad)
plt.gca().set_title('Gradient Palette', fontsize=10, pad=10)

plt.show()

In [14]:
# @title Plot settings
mpl.rcParams['figure.dpi'] = 600
mpl.rcParams["figure.figsize"] = 16,8

# Text
mpl.rcParams['font.family'] = 'Roboto'

# Title
mpl.rcParams['figure.titlesize'] = 32
mpl.rcParams['axes.titlesize'] = 32
mpl.rcParams['axes.titleweight'] = 'bold'

# Labels
mpl.rcParams['axes.labelsize'] = 22
mpl.rcParams['xtick.labelsize'] = 22
mpl.rcParams['ytick.labelsize'] = 22

# Spacing
mpl.rcParams['axes.titlepad'] = 72
mpl.rcParams['axes.labelpad'] = 10
mpl.rcParams['xtick.major.pad'] = 10
mpl.rcParams['ytick.major.pad'] = 10
mpl.rcParams['xtick.major.width'] = 0
mpl.rcParams['xtick.minor.width'] = 0
mpl.rcParams['ytick.major.width'] = 0
mpl.rcParams['ytick.minor.width'] = 0

# Spines and grids
mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.grid'] = False

# Legends
mpl.rcParams['legend.title_fontsize'] = 18
mpl.rcParams['legend.fontsize'] = 18
mpl.rcParams['legend.frameon'] = False

# Bars
plt.rcParams['patch.linewidth'] = 0
plt.rcParams['patch.edgecolor'] = 'none'

# Colors
mpl.rcParams["figure.facecolor"] = color_bg
mpl.rcParams["axes.facecolor"] = color_bg
mpl.rcParams["savefig.facecolor"] = color_bg

# Text colors
mpl.rcParams['text.color'] = color_text
mpl.rcParams['axes.labelcolor'] = color_text
mpl.rcParams['xtick.color'] = color_text
mpl.rcParams['ytick.color'] = color_text

# Line colorsc
mpl.rcParams['axes.edgecolor'] = color_text

In [None]:
g = sns.catplot(x='Primary Occupation Type', y='Count', hue='Sex', data=c14, kind='bar',palette=colors[1:3], height=10, aspect=2)

g.ax.set(xlabel="Primary Occupation Type", ylabel="Count")
g.ax.set_title('Distribution of Men and Women Across Different Primary Occupation Types')
# @title Comparing categorical groups
sns.move_legend(g, "upper right", bbox_to_anchor=(0.80, 0.98), ncol=3, title='Sex\n')
plt.show()


In [None]:
g = sns.catplot(x='Primary Occupation Industry', y='Count', hue='Sex', data=c16, kind='bar',palette=colors[1:3], height=10, aspect=2)

g.ax.set(xlabel="Primary Occupation Type", ylabel="Count")
g.ax.set_title('Distribution of Men and Women Across Different Industry Types')
# @title Comparing categorical groups
sns.move_legend(g, "upper right", bbox_to_anchor=(0.80, 0.98), ncol=3, title='Sex\n')
plt.show()

In [None]:
g = sns.catplot(x='Worker Class', y='Count', hue='Sex', data=c23, kind='bar',palette=colors[1:3], height=10, aspect=2)

g.ax.set(xlabel="Primary Occupation Type", ylabel="Count")
g.ax.set_title('Distribution of Men and Women Across Different Worker Classes')
# @title Comparing categorical groups
sns.move_legend(g, "upper right", bbox_to_anchor=(0.80, 0.98), ncol=3, title='Sex\n')
plt.show()

### 4. Statistical Analysis Results 

See attached writeup.