In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
##Importing the csv

df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Labor Force/Labor Force Survey.csv")

In [None]:
##checking the dataframe itself

df

In [None]:
##check for null values

df.info()

In [None]:
##Checking for wrong data types.

df.describe()

##Note that the Rate Columns were casted as objects rather than numeric data types

In [None]:
##After investigating, it turned out that the record for 2021 was the problem.
##The record for 2021 does not have any recorded value. Most likely due to the COVID pandemic.

df.loc[df['Labor Force Participation Rate'] == '..']

In [None]:

##Dropping the only row with nonexistent values
newdf = df.drop(df.index[4])

##Changing column types from objects to numerics
newdf['Labor Force Participation Rate'] = pd.to_numeric(newdf['Labor Force Participation Rate'])
newdf['Employment Rate'] = pd.to_numeric(newdf['Employment Rate'])
newdf['Unemployment Rate'] = pd.to_numeric(newdf['Unemployment Rate'])
newdf['Underemployment Rate'] = pd.to_numeric(newdf['Underemployment Rate'])
newdf['Visible Underemployment Rate'] = pd.to_numeric(newdf['Visible Underemployment Rate'])

newdf.info()

In [None]:
##Overall averages of the datasets over the year.

newdf.groupby('Year').agg('mean')

In [None]:
##CREATING A LINE GRAPH FOR THE AVERAGE LABOR FORCE PARTICIPATION RATE OVER THE YEARS
plt.figure(figsize=(18,5))

##Storing the mean Labor Participation Rate per year
lfpr = newdf.groupby('Year').agg('mean')['Labor Force Participation Rate'].to_frame()

##Settting up the line graph
sns.set_theme()
linechart = sns.lineplot(x = 'Year', y = 'Labor Force Participation Rate', data = lfpr, dashes = True)

##Customize the chart
linechart.set_title("Labor Force Participation decreased over the years.", fontdict={'fontsize' : 18})

plt.show()

In [None]:
##Creating the triangular correlation heat map

##Set up dataframe with no year
dfvalues = newdf.drop('Year', axis = 1)
plt.figure(figsize=(10,10))


##Set up mask
mask = np.triu(np.ones_like(dfvalues.corr()))


##et up correlation heatmap.
corrheat = sns.heatmap(dfvalues.corr(), annot = True, mask=mask)


##customizing the chart
corrheat.set_title('Correlation between the variables', fontdict = {'fontsize':25});

##Save the figure
plt.savefig('corrheat.png')

In [None]:
##Creating a line graph for the Labor Force Participation Rate and Employment Rate

plt.figure(figsize=(20,10))

sns.set_style('white')

lfpr = newdf.groupby('Year').agg('mean')['Labor Force Participation Rate'].to_frame()
empr = newdf.groupby('Year').agg('mean')['Employment Rate'].to_frame()

plt.plot(lfpr.index, lfpr, marker='v')
plt.plot(lfpr.index, empr, marker='o')
plt.xlabel('Years')
plt.ylabel('Rate')
plt.legend(['Labor Force Participation Rate', 'Employment Rate'])
plt.title('Labor Force Participation Rate vs. Employment Rate over the years')
sns.despine(left=True,bottom=True)

plt.show();