In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

layoffs_df= pd.read_excel("Data/layoffs.xlsx")

In [48]:
layoffs_df.head()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,N26,Berlin,Finance,71.0,0.04,2023-04-28,Series E,United States,1700.0
1,Providoor,Melbourne,Food,,1.0,2023-04-28,Unknown,Australia,
2,Dropbox,SF Bay Area,Other,500.0,0.16,2023-04-27,Post-IPO,United States,1700.0
3,Vroom,New York City,Transportation,120.0,0.11,2023-04-27,Post-IPO,United States,1300.0
4,Greenhouse,New York City,Recruiting,100.0,0.12,2023-04-27,Private Equity,United States,110.0


In [49]:
layoffs_df.columns

Index(['company', 'location', 'industry', 'total_laid_off',
       'percentage_laid_off', 'date', 'stage', 'country', 'funds_raised'],
      dtype='object')

In [50]:
layoffs_df.dtypes

company                 object
location                object
industry                object
total_laid_off         float64
percentage_laid_off    float64
date                    object
stage                   object
country                 object
funds_raised           float64
dtype: object

In [51]:
layoffs_df.describe()

Unnamed: 0,total_laid_off,percentage_laid_off,funds_raised
count,1746.0,1694.0,2297.0
mean,256.022337,0.260577,814.143794
std,841.557354,0.258415,5448.104463
min,3.0,0.0,0.0
25%,38.0,0.1,50.0
50%,80.0,0.17,156.0
75%,180.0,0.3,442.0
max,12000.0,1.0,121900.0


In [53]:
layoffs_df.info

<bound method DataFrame.info of             company        location        industry  total_laid_off  \
0               N26          Berlin         Finance            71.0   
1         Providoor       Melbourne            Food             NaN   
2           Dropbox     SF Bay Area           Other           500.0   
3             Vroom   New York City  Transportation           120.0   
4        Greenhouse   New York City      Recruiting           100.0   
...             ...             ...             ...             ...   
2540    Panda Squad     SF Bay Area        Consumer             6.0   
2541  Tamara Mellon     Los Angeles          Retail            20.0   
2542       EasyPost  Salt Lake City       Logistics            75.0   
2543      Blackbaud      Charleston           Other           500.0   
2544           Open       Bengaluru         Finance            47.0   

      percentage_laid_off        date           stage        country  \
0                    0.04  2023-04-28      

# Handling Missing Values

In [None]:
average_funds_raised = layoffs_df['funds_raised'].mean()
layoffs_df['funds_raised'].fillna(value=average_funds_raised, inplace=True)

In [None]:
average_laid_off = layoffs_df['total_laid_off'].mean()
layoffs_df['total_laid_off'].fillna(value=average_laid_off, inplace=True)

# Check Count of Categorical Values in Data

In [None]:
layoffs_df['stage'].value_counts()

In [None]:
layoffs_df['industry'].value_counts()

In [None]:
layoffs_df['country'].value_counts()

# Identify Correlations

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
# convert the categorical columns to numerical
layoffs_df['industry_encoded'] = le.fit_transform(layoffs_df['industry'])
layoffs_df['stage_encoded'] = le.fit_transform(layoffs_df['stage'])

In [None]:
print("Correlation between industry and total_laid_off:", layoffs_df['industry_encoded'].corr(layoffs_df['total_laid_off']))
print("Correlation between stage and total_laid_off:", layoffs_df['stage_encoded'].corr(layoffs_df['total_laid_off']))

In [None]:
layoffs_df.corr()

In [None]:
# A heatmap to better represent and understand the correlations
corr_heatmap = layoffs_df.corr()
f, ax = plt.subplots(figsize=(15, 10))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.heatmap(corr_heatmap, cmap=cmap, vmax=.3, center=0, annot=True,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

plt.show()

In [None]:
correlation = layoffs_df['funds_raised'].corr(layoffs_df['total_laid_off'])
print('Correlation: %.3f' % correlation)
plt.figure(figsize=(15, 10))
sns.scatterplot(data=layoffs_df, x="funds_raised", y="total_laid_off")
plt.show()

# Average Layoffs per Industry

In [None]:
import matplotlib.cm as cm
# Calculating mean number of layoffs per industry
average_layoffs_per_industry = layoffs_df.groupby('industry')['total_laid_off'].mean().sort_values(ascending=False)
print("Average layoffs per industry:\n", average_layoffs_per_industry)

colors = cm.viridis(np.linspace(0, 1, len(average_layoffs_per_industry)))
plt.figure(figsize=(15,10))
plt.bar(average_layoffs_per_industry.index, average_layoffs_per_industry.values, color=colors)
plt.title('Average layoffs per industry')
plt.xlabel('Industry')
plt.ylabel('Average layoffs')
plt.xticks(rotation=90)
plt.show()

# Total layoffs per industry

In [None]:
industry_counts = layoffs_df['industry'].value_counts().sort_values(ascending=False)

plt.figure(figsize=(15,10))
chart = sns.barplot(
    x=industry_counts.index,
    y=industry_counts.values,
    palette='Set1'
)

chart.set_xticklabels(chart.get_xticklabels(), rotation=45, horizontalalignment='right')
plt.title('Total layoffs per industry')
plt.xlabel('Industry')
plt.ylabel('Average layoffs')
plt.show()

# Lay-Offs Chronology

In [None]:
layoffs_df['date'] = pd.to_datetime(layoffs_df['date'])
yearly_layoffs = layoffs_df.resample('Y', on='date')['total_laid_off'].sum()

plt.figure(figsize=(15,10))
sns.lineplot(x=yearly_layoffs.index.year, y=yearly_layoffs.values)
plt.xlabel('Year')
plt.ylabel('Number of Layoffs')
plt.title('Chronology of Layoffs Over Years')
plt.show()

In [None]:
layoffs_df['date'] = pd.to_datetime(layoffs_df['date'])
# Group by year and compute total layoffs and funds raised per year
yearly_data = layoffs_df.resample('Y', on='date').agg({'total_laid_off': 'sum', 'funds_raised': 'sum'})
# Create a wider figure and a set of subplots
fig, ax1 = plt.subplots(figsize=(15, 10))

# Create the first line plot for total layoffs
color = 'tab:blue'
ax1.set_xlabel('Year')
ax1.set_ylabel('Total Number of Layoffs', color=color)
ax1.plot(yearly_data.index.year, yearly_data['total_laid_off'], color=color)
ax1.tick_params(axis='y', labelcolor=color)
# Create a second y-axis for the total funds raised
ax2 = ax1.twinx()

color = 'tab:red'
ax2.set_ylabel('Total Funds Raised', color=color)
ax2.plot(yearly_data.index.year, yearly_data['funds_raised'], color=color)
ax2.tick_params(axis='y', labelcolor=color)
plt.title('Total Layoffs and Funds Raised Over Time')
plt.show()

# LayOffs Distribution Over Stages

In [None]:
mean_layoffs_per_stage = layoffs_df.groupby('stage')['total_laid_off'].mean()
mean_layoffs_per_stage = mean_layoffs_per_stage.sort_values(ascending=False)
print(mean_layoffs_per_stage)

plt.figure(figsize=(15, 10))
sns.barplot(x=mean_layoffs_per_stage.index, y=mean_layoffs_per_stage.values)
plt.xlabel('Stage')
plt.ylabel('Average Number of Layoffs')
plt.title('Average Number of Layoffs Across Different Stages of Companies')
plt.show()

# Top 10 Countries with Highest Lay-Offs

In [None]:
# Group by country and sum total_laid_off
grouped_layoffs = layoffs_df.groupby('country')['total_laid_off'].sum().sort_values(ascending=False).head(10)

plt.figure(figsize=(15, 10))
sns.barplot(x=grouped_layoffs.index, y=grouped_layoffs.values, palette='viridis')
plt.xticks(rotation=90) # Rotate the country labels for better visibility if they are long
plt.xlabel('Country')
plt.ylabel('Total Number of Layoffs')
plt.title('Total Layoffs by Country')
plt.show()

# Top 15 Companies wise Lay-Offs

In [None]:
# Group by company and sum total_laid_off
grouped_layoffs = layoffs_df.groupby('company')['total_laid_off'].sum().sort_values(ascending=False).head(15)

plt.figure(figsize=(15, 10))
sns.barplot(x=grouped_layoffs.index, y=grouped_layoffs.values, palette='viridis')
plt.xticks(rotation=90)
plt.xlabel('Country')
plt.ylabel('Total Number of Layoffs')
plt.title('Total Layoffs by Country')
plt.show()

# Box Plotting the lay-offs across different stages

In [None]:
plt.figure(figsize=(15, 8))
sns.boxplot(x='stage', y='total_laid_off', data=layoffs_df)
plt.title('Distribution of Total Layoffs Across Different Stages')
plt.xlabel('Stage')
plt.ylabel('Total Laid Off')
plt.show()

# LayOffs Over time: Month/Year Format

In [None]:
layoffs_df['date'] = pd.to_datetime(layoffs_df['date'])
grouped = layoffs_df.groupby('date')['total_laid_off'].sum()

fig, ax = plt.subplots(figsize=(12,6))
grouped.plot(kind='area', ax=ax)
plt.title('Total Layoffs Over Time')
plt.xlabel('Date')
plt.ylabel('Total Layoffs')
plt.show()

# Lay-Offs Map

In [None]:
import plotly.express as px

# aggregate data by country
country_layoffs = layoffs_df.groupby('country')['total_laid_off'].sum().reset_index()
fig = px.choropleth(country_layoffs,
                    locations="country",
                    color="total_laid_off",
                    locationmode = 'country names',
                    color_continuous_scale=px.colors.sequential.Plasma,
                    title='Total Layoffs by Country')
fig.show()