### In this notebook, I have explored the data, transformed the data to required format using Pandas, visualized the data using Plotly & found the below observations: 
1. Top 10 companies that laid off
1. Top 3 companies that laid off year-wise
1. Top 3 locations where most layoffs happened year-wise
1. Top 20 companies that laid off x% of employees
1. Top 10 countries where most layoffs happened
1. Top 10 locations where most layoffs happened in USA
1. Top locations where most layoffs happened in India
1. Relationship between funds received and layoffs
1. In which stage of the company had the most lay offs?
1. Which industry had the most layoffs?
1. Total layoffs year-wise
1. Year wise layoffs according to country

# Importing libraries

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Let`s dive into the data 

In [33]:
df = pd.read_csv('layoffs.csv')
df

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,Northvolt,Stockholm,Energy,1600.0,0.20,2025-09-23,Unknown,Sweden,13800.0
1,Drata,San Diego,Security,40.0,0.09,2024-09-26,Series C,United States,328.0
2,Moov,Cedar Falls,Finance,50.0,,2024-09-25,Series B,United States,77.0
3,FreightWaves,Chattanooga,Logistics,16.0,,2024-09-24,Private Equity,United States,92.0
4,F5,Seattle,Security,,,2024-09-24,Post-IPO,United States,
...,...,...,...,...,...,...,...,...,...
3801,Service,Los Angeles,Travel,,1.00,2020-03-16,Seed,United States,5.1
3802,HopSkipDrive,Los Angeles,Transportation,8.0,0.10,2020-03-13,Unknown,United States,45.0
3803,Panda Squad,SF Bay Area,Consumer,6.0,0.75,2020-03-13,Seed,United States,1.0
3804,Tamara Mellon,Los Angeles,Retail,20.0,0.40,2020-03-12,Series C,United States,90.0


In [34]:
# Total companies
df.company.nunique()
len(df.company.unique())

2681

2681

In [35]:
# But shape is 1634
df.shape

(3806, 9)

### Total company count and the Shape is 3806. Let's see duplicate companies

In [36]:
df.company.value_counts().sort_values(ascending=False)

Google           12
Amazon           12
Microsoft         9
Rivian            8
Spotify           7
                 ..
Lacework          1
Dazn              1
Truck It In       1
VTEX              1
Tamara Mellon     1
Name: company, Length: 2681, dtype: int64

In [37]:
df.company.value_counts() > 1

Google            True
Amazon            True
Microsoft         True
Rivian            True
Spotify           True
                 ...  
Color Health     False
iFood            False
Truckstop.com    False
Kandela          False
Tamara Mellon    False
Name: company, Length: 2681, dtype: bool

In [38]:
df[df.company == "Uber"]

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
1036,Uber,SF Bay Area,Transportation,200.0,,2023-06-21,Post-IPO,United States,25200.0
2449,Uber,Vilnius,Transportation,60.0,,2022-09-07,Post-IPO,Lithuania,24700.0
3282,Uber,Amsterdam,Transportation,225.0,0.25,2020-06-12,Post-IPO,Netherlands,24700.0
3314,Uber,Bengaluru,Transportation,600.0,0.23,2020-05-26,Post-IPO,India,24700.0
3333,Uber,SF Bay Area,Transportation,3000.0,0.13,2020-05-18,Post-IPO,United States,24700.0
3379,Uber,SF Bay Area,Transportation,3700.0,0.14,2020-05-06,Post-IPO,United States,24700.0


### Company can lay off on different dates. So, there are duplicate company values

In [39]:
df.groupby('company').sum()

Unnamed: 0_level_0,total_laid_off,percentage_laid_off,funds_raised
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E Inc.,0.0,0.00,0.0
F-Secure,70.0,0.00,0.0
Included Health,0.0,0.06,272.0
#Paid,19.0,0.17,21.0
&Open,9.0,0.09,35.0
...,...,...,...
kununu,0.0,0.00,0.0
mPharma,150.0,0.00,90.0
nCino,100.0,0.07,1100.0
tZero,0.0,0.00,472.0


In [40]:
df.groupby('company').sum().loc['Uber',:]

total_laid_off           7785.00
percentage_laid_off         0.75
funds_raised           148700.00
Name: Uber, dtype: float64

### Find NaNs

In [41]:
df.head(2)
df.isnull().sum()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,Northvolt,Stockholm,Energy,1600.0,0.2,2025-09-23,Unknown,Sweden,13800.0
1,Drata,San Diego,Security,40.0,0.09,2024-09-26,Series C,United States,328.0


company                   0
location                  1
industry                  1
total_laid_off         1319
percentage_laid_off    1369
date                      0
stage                     7
country                   0
funds_raised            415
dtype: int64

### Fill NaNs with 0s

In [14]:
df.fillna(value=0,inplace=True)
df.isnull().sum()

company                0
location               0
industry               0
total_laid_off         0
percentage_laid_off    0
date                   0
stage                  0
country                0
funds_raised           0
dtype: int64

In [42]:
df.head()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,Northvolt,Stockholm,Energy,1600.0,0.2,2025-09-23,Unknown,Sweden,13800.0
1,Drata,San Diego,Security,40.0,0.09,2024-09-26,Series C,United States,328.0
2,Moov,Cedar Falls,Finance,50.0,,2024-09-25,Series B,United States,77.0
3,FreightWaves,Chattanooga,Logistics,16.0,,2024-09-24,Private Equity,United States,92.0
4,F5,Seattle,Security,,,2024-09-24,Post-IPO,United States,


## Top 10 companies that laid off 

In [45]:
top10_idx = df['total_laid_off'].sort_values(ascending=False)[:10].index
top10_idx
df.iloc[top10_idx,:]
px.bar(df.iloc[top10_idx,:],x='company',y='total_laid_off', text='total_laid_off',title='Top 10 companies that laid off')

Int64Index([76, 241, 1707, 2191, 1382, 2123, 1740, 1365, 1452, 1882], dtype='int64')

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
76,Intel,SF Bay Area,Hardware,15000.0,0.15,2024-08-01,Post-IPO,United States,12.0
241,Tesla,Austin,Transportation,14000.0,0.1,2024-04-15,Post-IPO,United States,20200.0
1707,Google,SF Bay Area,Consumer,12000.0,0.06,2023-01-20,Post-IPO,United States,26.0
2191,Meta,SF Bay Area,Consumer,11000.0,0.13,2022-11-09,Post-IPO,United States,26000.0
1382,Meta,SF Bay Area,Consumer,10000.0,,2023-03-14,Post-IPO,United States,26000.0
2123,Amazon,Seattle,Retail,10000.0,0.03,2022-11-16,Post-IPO,United States,108.0
1740,Microsoft,Seattle,Other,10000.0,0.05,2023-01-18,Post-IPO,United States,1.0
1365,Amazon,Seattle,Retail,9000.0,,2023-03-20,Post-IPO,United States,108.0
1452,Ericsson,Stockholm,Other,8500.0,0.08,2023-02-24,Post-IPO,Sweden,663.0
1882,Amazon,Seattle,Retail,8000.0,0.02,2023-01-04,Post-IPO,United States,108.0


### This is wrong, Meta and Amazon are repeated. Need to clean the data such that there are no duplicate companies.

In [46]:
df_no_dup = df.copy()

In [48]:
# Top 10 companies that laid off 
df_no_dup.groupby('company').total_laid_off.sum().sort_values(ascending=False)[:10]
px.bar(df_no_dup.groupby('company').total_laid_off.sum().sort_values(ascending=False)[:10],text_auto=True,title='Top 10 companies that laid off',
      labels={"x":"Company","y":"Layoffs"})

company
Amazon        27840.0
Meta          21000.0
Intel         16057.0
Microsoft     14708.0
Tesla         14500.0
Cisco         14300.0
Google        13472.0
Dell          12650.0
Salesforce    11140.0
SAP           11000.0
Name: total_laid_off, dtype: float64

### This is correct :)

# Top 3 companies that laid off year-wise

In [55]:
df.head()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised,Year
1,Drata,San Diego,Security,40.0,0.09,2024-09-26,Series C,United States,328.0,2024
2,Moov,Cedar Falls,Finance,50.0,,2024-09-25,Series B,United States,77.0,2024
3,FreightWaves,Chattanooga,Logistics,16.0,,2024-09-24,Private Equity,United States,92.0,2024
4,F5,Seattle,Security,,,2024-09-24,Post-IPO,United States,,2024
5,Olo,New York City,Food,,0.09,2024-09-23,Post-IPO,United States,184.0,2024


In [56]:
df = df[df['date'] != '2025-09-23'] # 2025 delete

In [57]:
df.head()

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised,Year
1,Drata,San Diego,Security,40.0,0.09,2024-09-26,Series C,United States,328.0,2024
2,Moov,Cedar Falls,Finance,50.0,,2024-09-25,Series B,United States,77.0,2024
3,FreightWaves,Chattanooga,Logistics,16.0,,2024-09-24,Private Equity,United States,92.0,2024
4,F5,Seattle,Security,,,2024-09-24,Post-IPO,United States,,2024
5,Olo,New York City,Food,,0.09,2024-09-23,Post-IPO,United States,184.0,2024


### Need to filter only year from date column

In [58]:
df["Year"] = df["date"].map(lambda x : x[:4])
df

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised,Year
1,Drata,San Diego,Security,40.0,0.09,2024-09-26,Series C,United States,328.0,2024
2,Moov,Cedar Falls,Finance,50.0,,2024-09-25,Series B,United States,77.0,2024
3,FreightWaves,Chattanooga,Logistics,16.0,,2024-09-24,Private Equity,United States,92.0,2024
4,F5,Seattle,Security,,,2024-09-24,Post-IPO,United States,,2024
5,Olo,New York City,Food,,0.09,2024-09-23,Post-IPO,United States,184.0,2024
...,...,...,...,...,...,...,...,...,...,...
3801,Service,Los Angeles,Travel,,1.00,2020-03-16,Seed,United States,5.1,2020
3802,HopSkipDrive,Los Angeles,Transportation,8.0,0.10,2020-03-13,Unknown,United States,45.0,2020
3803,Panda Squad,SF Bay Area,Consumer,6.0,0.75,2020-03-13,Seed,United States,1.0,2020
3804,Tamara Mellon,Los Angeles,Retail,20.0,0.40,2020-03-12,Series C,United States,90.0,2020


In [62]:
df.groupby(["Year"],sort=False)["total_laid_off"].max()

Year
2024    15000.0
2023    12000.0
2022    11000.0
2021     2434.0
2020     4375.0
Name: total_laid_off, dtype: float64

In [63]:
df.groupby(['Year','company']).total_laid_off.agg([max])

Unnamed: 0_level_0,Unnamed: 1_level_0,max
Year,company,Unnamed: 2_level_1
2020,1stdibs,70.0
2020,Acko,45.0
2020,Acorns,50.0
2020,Actifio,54.0
2020,AdRoll,210.0
...,...,...
2024,Zuora,
2024,Zwift,
2024,eBay,1000.0
2024,iCIMS,69.0


In [87]:
top_3_year_wise = df.groupby(['Year','company']).total_laid_off.agg([max])
top_3_year_wise

Unnamed: 0_level_0,Unnamed: 1_level_0,max
Year,company,Unnamed: 2_level_1
2020,1stdibs,70.0
2020,Acko,45.0
2020,Acorns,50.0
2020,Actifio,54.0
2020,AdRoll,210.0
...,...,...
2024,Zuora,
2024,Zwift,
2024,eBay,1000.0
2024,iCIMS,69.0


In [88]:
top_3_year_wise["max"]
top_3_year_wise["max"].groupby(['Year'],group_keys=False)

Year  company
2020  1stdibs      70.0
      Acko         45.0
      Acorns       50.0
      Actifio      54.0
      AdRoll      210.0
                  ...  
2024  Zuora         NaN
      Zwift         NaN
      eBay       1000.0
      iCIMS        69.0
      iRobot      350.0
Name: max, Length: 3351, dtype: float64

<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000023B908E0DF0>

In [89]:
# Got it from https://stackoverflow.com/questions/27842613/pandas-groupby-then-sort-within-groups 
g = top_3_year_wise["max"].groupby(['Year'],group_keys=False)
top_3_year_wise2 = g.apply(lambda x : x.sort_values(ascending=False).head(3)) # Selecting each group, sorting & taking top 3
top_3_year_wise2

Year  company    
2020  Booking.com     4375.0
      Uber            3700.0
      Groupon         2800.0
2021  Katerra         2434.0
      Zillow          2000.0
      Instacart       1877.0
2022  Meta           11000.0
      Amazon         10000.0
      Cisco           4100.0
2023  Google         12000.0
      Meta           10000.0
      Microsoft      10000.0
2024  Intel          15000.0
      Tesla          14000.0
      SAP             8000.0
Name: max, dtype: float64

In [90]:
g.nlargest(3) # Get top 3 in each group ( inbuilt method )

Year  company    
2020  Booking.com     4375.0
      Uber            3700.0
      Groupon         2800.0
2021  Katerra         2434.0
      Zillow          2000.0
      Instacart       1877.0
2022  Meta           11000.0
      Amazon         10000.0
      Cisco           4100.0
2023  Google         12000.0
      Meta           10000.0
      Microsoft      10000.0
2024  Intel          15000.0
      Tesla          14000.0
      SAP             8000.0
Name: max, dtype: float64

In [91]:
top_3_year_wise2.index
top_3_year_wise2.values

MultiIndex([('2020', 'Booking.com'),
            ('2020',        'Uber'),
            ('2020',     'Groupon'),
            ('2021',     'Katerra'),
            ('2021',      'Zillow'),
            ('2021',   'Instacart'),
            ('2022',        'Meta'),
            ('2022',      'Amazon'),
            ('2022',       'Cisco'),
            ('2023',      'Google'),
            ('2023',        'Meta'),
            ('2023',   'Microsoft'),
            ('2024',       'Intel'),
            ('2024',       'Tesla'),
            ('2024',         'SAP')],
           names=['Year', 'company'])

array([ 4375.,  3700.,  2800.,  2434.,  2000.,  1877., 11000., 10000.,
        4100., 12000., 10000., 10000., 15000., 14000.,  8000.])

In [92]:
top_3_year_wise3 = pd.DataFrame()
top_3_year_wise3["total_laid_off"] = top_3_year_wise2.values
top_3_year_wise3

Unnamed: 0,total_laid_off
0,4375.0
1,3700.0
2,2800.0
3,2434.0
4,2000.0
5,1877.0
6,11000.0
7,10000.0
8,4100.0
9,12000.0


In [93]:
y = []
c = []
for i,j in top_3_year_wise2.index:
    y.append(i)
    c.append(j)
top_3_year_wise3["Year"] = y
top_3_year_wise3["Company"] = c
top_3_year_wise3

Unnamed: 0,total_laid_off,Year,Company
0,4375.0,2020,Booking.com
1,3700.0,2020,Uber
2,2800.0,2020,Groupon
3,2434.0,2021,Katerra
4,2000.0,2021,Zillow
5,1877.0,2021,Instacart
6,11000.0,2022,Meta
7,10000.0,2022,Amazon
8,4100.0,2022,Cisco
9,12000.0,2023,Google


In [94]:
px.bar(top_3_year_wise3,x='Year',y='total_laid_off',color='Company', title='Top 3 companies that laid off year-wise',text_auto=True)

## Top 3 locations where most layoffs happened year-wise

In [73]:
df.head()
top_3_loction_year_wise = df.groupby(["Year","location"]).total_laid_off.agg([max])
top_3_loction_year_wise

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised,Year
1,Drata,San Diego,Security,40.0,0.09,2024-09-26,Series C,United States,328.0,2024
2,Moov,Cedar Falls,Finance,50.0,,2024-09-25,Series B,United States,77.0,2024
3,FreightWaves,Chattanooga,Logistics,16.0,,2024-09-24,Private Equity,United States,92.0,2024
4,F5,Seattle,Security,,,2024-09-24,Post-IPO,United States,,2024
5,Olo,New York City,Food,,0.09,2024-09-23,Post-IPO,United States,184.0,2024


Unnamed: 0_level_0,Unnamed: 1_level_0,max
Year,location,Unnamed: 2_level_1
2020,Ahmedabad,500.0
2020,Amsterdam,4375.0
2020,Ann Arbor,40.0
2020,Atlanta,150.0
2020,Austin,451.0
...,...,...
2024,Washington D.C.,50.0
2024,Waterloo,1200.0
2024,Winnipeg,800.0
2024,Xi'an,


In [76]:
g1 = top_3_loction_year_wise.groupby(["Year"],group_keys=False)
top_3_loction_year_wise2 = g1.apply(lambda x : x.sort_values(["max"], ascending=False).head(3)) 
top_3_loction_year_wise2

Unnamed: 0_level_0,Unnamed: 1_level_0,max
Year,location,Unnamed: 2_level_1
2020,Amsterdam,4375.0
2020,SF Bay Area,3700.0
2020,Chicago,2800.0
2021,SF Bay Area,2434.0
2021,Seattle,2000.0
2021,Mumbai,1800.0
2022,SF Bay Area,11000.0
2022,Seattle,10000.0
2022,Amsterdam,4000.0
2023,SF Bay Area,12000.0


In [78]:
top_3_loction_year_wise2.values.reshape(15,).tolist()

[4375.0,
 3700.0,
 2800.0,
 2434.0,
 2000.0,
 1800.0,
 11000.0,
 10000.0,
 4000.0,
 12000.0,
 10000.0,
 8500.0,
 15000.0,
 14000.0,
 8000.0]

In [80]:
top_3_loction_year_wise3 = pd.DataFrame()
top_3_loction_year_wise3["total_laid_off"] = top_3_loction_year_wise2.values.reshape(15,).tolist()
top_3_loction_year_wise3

Unnamed: 0,total_laid_off
0,4375.0
1,3700.0
2,2800.0
3,2434.0
4,2000.0
5,1800.0
6,11000.0
7,10000.0
8,4000.0
9,12000.0


In [95]:
y = []
l = []
for i,j in top_3_loction_year_wise2.index:
    y.append(i)
    l.append(j)
top_3_loction_year_wise3["Year"] = y
top_3_loction_year_wise3["Location"] = l
top_3_loction_year_wise3

Unnamed: 0,total_laid_off,Year,Location
0,4375.0,2020,Amsterdam
1,3700.0,2020,SF Bay Area
2,2800.0,2020,Chicago
3,2434.0,2021,SF Bay Area
4,2000.0,2021,Seattle
5,1800.0,2021,Mumbai
6,11000.0,2022,SF Bay Area
7,10000.0,2022,Seattle
8,4000.0,2022,Amsterdam
9,12000.0,2023,SF Bay Area


In [96]:
px.bar(top_3_loction_year_wise3,x='Year',y='total_laid_off',color='Location', title='Top 3 locations year-wise where layoffs happened the most',text_auto=True)

## Top 20 companies that laid off x% of employees

In [84]:
(df_no_dup.groupby('company').percentage_laid_off.sum().sort_values(ascending=False)[:20])*100
px.bar((df_no_dup.groupby('company').percentage_laid_off.sum().sort_values(ascending=False)[:20])*100,text_auto=True,title='Top 20 companies that laid off x% of employees')

company
FrontRow        295.0
Zeus Living     226.0
Cue Health      222.0
Joonko          200.0
Service         200.0
ReshaMandi      180.0
Airy Rooms      170.0
Zume            167.0
Medly           166.0
Pollen          164.0
Shift           164.0
Rubius          157.0
RenoRun         155.0
IronNet         152.0
Propzy          150.0
The Wing        150.0
InVision        150.0
Kenko Health    140.0
Dunzo           138.0
Nori            137.0
Name: percentage_laid_off, dtype: float64

In [97]:
# Country wise lay offs percentage
country_group  = df.groupby(['country']).sum().sort_values(['total_laid_off'],ascending=False)
country_group.head(10)

Unnamed: 0_level_0,total_laid_off,percentage_laid_off,funds_raised
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
United States,448358.0,433.3225,1893594.0
India,55759.0,59.05,225884.6
Germany,28153.0,18.875,82532.98
United Kingdom,20769.0,29.06,75770.44
Netherlands,18705.0,3.29,30944.0
Sweden,13659.0,4.5,28394.0
Canada,12422.0,25.13,24795.3
Brazil,11323.0,12.43,36698.7
China,8190.0,1.51,102067.0
Singapore,7440.0,10.05,53470.98


## Top 10 countries where most layoffs happened

In [98]:
px.bar(country_group.iloc[:10,0],text_auto=True,title='Top 10 countries where most layoffs happened')

## Top 10 locations where most layoffs happened in USA

In [99]:
# Only USA location wise
location_usa_group  = df[df.country == "United States"].groupby(['location']).sum().sort_values(['total_laid_off'],ascending=False)
location_usa_group

Unnamed: 0_level_0,total_laid_off,percentage_laid_off,funds_raised
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SF Bay Area,209618.0,165.5575,1189757.3
Seattle,54554.0,23.6900,43151.2
New York City,37457.0,73.5100,156248.2
Austin,33583.0,11.3500,72654.3
Boston,19372.0,29.7900,60667.0
...,...,...,...
Huntsville,0.0,0.0000,0.0
Providence,0.0,0.1500,946.0
Houston,0.0,1.0000,22.0
Richmond,0.0,0.0000,221.0


In [100]:
# Top 10 places in the US
px.bar(location_usa_group.iloc[:10,:1], text_auto=True,title='Top 10 locations where most layoffs happened in USA')

## Top 10 locations where most layoffs happened in India

In [101]:
px.bar(df[df.country == "India"].groupby(['location']).sum().sort_values(['total_laid_off'],ascending=False).iloc[:10,:1], text_auto=True,title='Top locations where most layoffs happened in India')

## Relationship between funds and layoffs


In [102]:
df["funds_raised"].corr(df["total_laid_off"])
px.line(df,x='total_laid_off',y='funds_raised',hover_name='company')
px.line(df,x='funds_raised', y='total_laid_off',hover_name='company')

0.11213809375286488

In [103]:
px.scatter(df,x='funds_raised', y='total_laid_off',hover_name='company')
px.scatter(df,x='total_laid_off',y='funds_raised',hover_name='company')

### As we can see in the above plot, there is no dependency b/w funds & lay offs

## In which stage of the company had the most lay offs?

In [104]:
df["stage"].unique()

array(['Series C', 'Series B', 'Private Equity', 'Post-IPO', 'Series D',
       'Acquired', 'Seed', 'Unknown', 'Series A', 'Series E', 'Series F',
       'Subsidiary', 'Series H', 'Series G', 'Series J', 'Series I', nan],
      dtype=object)

In [105]:
df.groupby(["stage"]).total_laid_off.sum()

stage
Acquired           63970.0
Post-IPO          370999.0
Private Equity     11015.0
Seed                2159.0
Series A            8255.0
Series B           29306.0
Series C           25311.0
Series D           25186.0
Series E           22187.0
Series F           13613.0
Series G            4452.0
Series H            8197.0
Series I            2855.0
Series J            3750.0
Subsidiary          7214.0
Unknown            65125.0
Name: total_laid_off, dtype: float64

In [106]:
px.bar(df.groupby(["stage"]).total_laid_off.sum().sort_values(ascending=False),title='Layoffs & company stage',
       text_auto=True,orientation='h'
      )

## Which industry had the most layoffs?

In [113]:
df['industry'] = df['industry'].replace('https://www.calcalistech.com/ctechnews/article/rysmrkfua', 'calcalistech')

In [114]:
top_industry_wise = df.groupby(['industry']).total_laid_off.sum().sort_values(ascending=False)
top_industry_wise

industry
Retail            71556.0
Consumer          69941.0
Transportation    60668.0
Other             60636.0
Hardware          53870.0
Finance           49042.0
Food              48023.0
Healthcare        38011.0
Travel            23235.0
Infrastructure    20614.0
Education         19642.0
Real Estate       19450.0
Sales             15248.0
Crypto            13683.0
Marketing         12654.0
Media             10581.0
HR                10397.0
Data              10055.0
Security           9948.0
Fitness            9841.0
Manufacturing      7350.0
Support            6381.0
Logistics          5372.0
Recruiting         5170.0
Construction       3863.0
Energy             3816.0
Product            2108.0
Aerospace          1472.0
Legal               966.0
AI                  288.0
calcalistech          0.0
Name: total_laid_off, dtype: float64

In [115]:
top_industry_wise.shape

(31,)

In [116]:
px.bar(x=top_industry_wise.index,y=top_industry_wise.values,text_auto=True,labels={'x': 'Industry', 'y':'Layoffs'},
      title='Industry vs Layoffs')

## Total layoffs year-wise

In [117]:
total_laid_year_wise = df.groupby(["Year"]).total_laid_off.sum()
total_laid_year_wise

Year
2020     80998.0
2021     15823.0
2022    165269.0
2023    264220.0
2024    137606.0
Name: total_laid_off, dtype: float64

In [118]:
px.bar(x=total_laid_year_wise.index,y=total_laid_year_wise.values,
      labels={"x":"Year","y":"Layoffs"},
       text_auto=True,
       title="Year-wise layoffs"
      )

## Year wise layoffs according to country

In [119]:
total_laid_year_country_wise = df.groupby(["Year","country"]).total_laid_off.sum()
total_laid_year_country_wise

Year  country       
2020  Australia           126.0
      Brazil             3341.0
      Bulgaria            120.0
      Canada             1211.0
      China                 0.0
                         ...   
2024  Spain                76.0
      Sweden              200.0
      United Kingdom     5390.0
      United States     98607.0
      Uruguay             104.0
Name: total_laid_off, Length: 160, dtype: float64

In [120]:
len(df.country.unique()) # We have 55 countries

63

In [121]:
total_laid_year_country_wise_year = []
total_laid_year_country_wise_country = []
for i,j in total_laid_year_country_wise.index:
    total_laid_year_country_wise_year.append(i)
    total_laid_year_country_wise_country.append(j)

In [122]:
total_laid_year_country_wise2 = pd.DataFrame({
    "Year": total_laid_year_country_wise_year,
    "Country": total_laid_year_country_wise_country,
    "total_laid_off": total_laid_year_country_wise.values 
})

In [123]:
total_laid_year_country_wise2

Unnamed: 0,Year,Country,total_laid_off
0,2020,Australia,126.0
1,2020,Brazil,3341.0
2,2020,Bulgaria,120.0
3,2020,Canada,1211.0
4,2020,China,0.0
...,...,...,...
155,2024,Spain,76.0
156,2024,Sweden,200.0
157,2024,United Kingdom,5390.0
158,2024,United States,98607.0


In [124]:
total_laid_year_country_wise2.sort_values(["Year","total_laid_off"],ascending=False,inplace=True)

In [125]:
px.bar(total_laid_year_country_wise2,x='Year',y='total_laid_off',color='Country',text='Country',
      title='Year wise layoffs according to country'
      )