# Tree Map of Tech Layoffs by State (CA, NY, TX, WA)
This notebook visualizes the number of tech layoffs in California, New York, Texas, and Washington using a tree map. Data is loaded from the respective CSV files for each state.

## Standardize Data Formats and Clean Data

In [37]:
# first standardize the data - for each dataset, we are only interested in 
# the company, city, the number of workers, and the effective date / layoff start date for now
import pandas as pd

# data cleaning for NY

ny_df = pd.read_csv('../data/tech_layoffs_ny.csv')

# standardize the column names
ny_df = ny_df.drop(['State','Union','Notes','County','Region'], axis=1)
# clean dataset to only include tech companies, filter rows where company is Microsoft, Amazon, or Google
tech_keywords = ['Google', 'Amazon Corporate LLC - My Habits Business', 'Meta', 'Microsoft Corporation', 'Apple Inc.', 'Twitter']
# adjust ny_df to only include rows where the Company column contains any of the tech keywords
ny_df = ny_df[ny_df['Company'].notna()]  # Ensure 'Company' column is not null
# ny_df = ny_df[ny_df['Company'].str.contains('|'.join(tech_keywords), case=False, na=False)]
ny_df = ny_df[ny_df['Company'].str.lower().isin([k.lower() for k in tech_keywords])]
ny_df = ny_df.drop([ 'Closure/Layoff','Temporary/Permanent', 'Industry'], axis=1)
# rename Amazon column to Amazon from Amazon Corporate LLC - My Habits Business
ny_df['Company'] = ny_df['Company'].replace({'Amazon Corporate LLC - My Habits Business': 'Amazon'})
# convert WARN Received Date to datetime format
ny_df['WARN Received Date'] = pd.to_datetime(ny_df['WARN Received Date'], errors='coerce')
# convert the number of workers to numeric, errors='coerce' will turn non-numeric values into NaN
ny_df['Number of Workers'] = pd.to_numeric(ny_df['Number of Workers'], errors='coerce')
# save ny data to csv
ny_df.to_csv('../data/tech_layoffs_ny_cleaned.csv', index=False)



In [38]:
# data cleaning for CA
ca_df = pd.read_csv('../data/tech_layoffs_ca_2025.csv')

ca_df = ca_df.drop(['State','Closure / Layoff', 'County', 'Industry','Temporary/Permanent'], axis=1)
# add a wildcard after Microsoft to match any variations
# replace anything that starts with Microsoft - with Microsoft
ca_df['Company'] = ca_df['Company'].str.replace(r'^Microsoft -.*', 'Microsoft', regex=True)
ca_df['Company'] = ca_df['Company'].str.replace(r'^Google -.*', 'Google', regex=True)
# convert WARN Received Date to datetime format
ca_df['WARN Received Date'] = pd.to_datetime(ca_df['WARN Received Date'], errors='coerce')
# convert the number of workers to numeric, errors='coerce' will turn non-numeric values into NaN
ca_df['Number of Workers'] = pd.to_numeric(ca_df['Number of Workers'], errors='coerce')

ca_df.to_csv('../data/tech_layoffs_ca_cleaned.csv', index=False)

In [3]:
# data cleaning for TX
tx_df = pd.read_csv('../data/tech_layoffs_te.csv')
tx_df = tx_df.drop(['State','Closure/Layoff', 'County', 'Industry','Temporary/Permanent', 'Union', 'Region','Notes'], axis=1)
# filter rows where company is Microsoft, Amazon, or Google
tech_keywords_tx = ['Google', 'Amazon Corporate LLC - My Habits Business', r'Meta Platforms, Inc\..*', 'Microsoft Corporation', 'Apple Inc.', 'Twitter']
# adjust tx_df to only include rows where the Company column matches any of the tech keywords using regex
pattern = '|'.join(tech_keywords_tx)
tx_df = tx_df[tx_df['Company'].notna()]
tx_df = tx_df[tx_df['Company'].str.contains(pattern, case=False, na=False, regex=True)]

# convert WARN Received Date to datetime format
tx_df['WARN Received Date'] = pd.to_datetime(tx_df['WARN Received Date'], errors='coerce')
# convert the number of workers to numeric, errors='coerce' will turn non-numeric values into NaN
tx_df['Number of Workers'] = pd.to_numeric(tx_df['Number of Workers'], errors='coerce')
tx_df.to_csv('../data/tech_layoffs_tx_cleaned.csv', index=False)
tx_df.head()


Unnamed: 0,Company,City,Number of Workers,WARN Received Date,Effective Date
563,"Meta Platforms, Inc. (West 3rd)",Austin,154,2022-11-16,01/13/2023
565,"Meta Platforms, Inc. (Mcallen Pass)",Austin,11,2022-11-16,01/13/2023
566,"Meta Platforms, Inc.(W,6th)",Austin,50,2022-11-16,01/13/2023


In [40]:
# data cleaning for WA
wa_df = pd.read_csv('../data/tech_layoffs_wa.csv')
wa_df = wa_df.drop(['Closure Layoff', 'Type of Layoff'], axis=1)
# rename Location to City
wa_df = wa_df.rename(columns={'Location': 'City'})
# rename # of Workers to Number of Workers
wa_df = wa_df.rename(columns={'# of Workers': 'Number of Workers'})
# rename Received Date to WARN Received Date
wa_df = wa_df.rename(columns={'Received Date': 'WARN Received Date'})
# rename Layoff Start Date to Effective Date
wa_df = wa_df.rename(columns={'Layoff Start Date': 'Effective Date'})
# convert WARN Received Date to datetime format
wa_df['WARN Received Date'] = pd.to_datetime(wa_df['WARN Received Date'], errors='coerce')
# convert Effective Date to datetime format
wa_df['Effective Date'] = pd.to_datetime(wa_df['Effective Date'], errors='coerce')
# switch columns to match the other datasets
wa_df = wa_df[['Company', 'City', 'Number of Workers', 'WARN Received Date', 'Effective Date']]
# convert the number of workers to numeric, errors='coerce' will turn non-numeric values into NaN
wa_df['Number of Workers'] = pd.to_numeric(wa_df['Number of Workers'], errors='coerce')
# save wa data to csv
wa_df.to_csv('../data/tech_layoffs_wa_cleaned.csv', index=False)
wa_df.head()


Unnamed: 0,Company,City,Number of Workers,WARN Received Date,Effective Date
0,"Meta Platforms, Inc",Seattle,419,2022-11-11,2023-01-13
1,"Meta Platforms, Inc",Bellevue,307,2022-11-11,2023-01-13
2,Amazon,Tukwila,172,2024-05-31,2024-08-01
3,Amazon,"Seattle, Bellevue",2320,2023-01-18,2023-03-19
4,Amazon Health Services,Seattle,159,2022-09-07,2022-12-01


## Visualize Cleaned Data as Treemap

### Treemap with States
This is less useful as Seattle clearly dominates.

In [4]:
import pandas as pd
import plotly.express as px

# Define file paths for each state (use cleaned datasets)
state_files = {
    'California': '../data/tech_layoffs_ca_cleaned.csv',
    'New York': '../data/tech_layoffs_ny_cleaned.csv',
    'Texas': '../data/tech_layoffs_tx_cleaned.csv',
    'Washington': '../data/tech_layoffs_wa_cleaned.csv',
}

# Count number of layoffs for each state
state_layoffs = {}
for state, path in state_files.items():
    try:
        df = pd.read_csv(path)
        if 'Number of Workers' in df.columns:
            layoffs = df['Number of Workers'].fillna(0).astype(int).sum()
        else:
            layoffs = df.shape[0]
        state_layoffs[state] = layoffs
    except Exception as e:
        print(f"Error reading {state}: {e}")
        state_layoffs[state] = 0

# Prepare data for treemap
states = list(state_layoffs.keys())
layoff_counts = list(state_layoffs.values())

tree_df = pd.DataFrame({'State': states, 'Layoffs': layoff_counts})

# Create treemap
fig = px.treemap(tree_df, path=['State'], values='Layoffs',
                 title='Tech Layoffs by State (2025): CA, NY, TX, WA',
                 color='Layoffs', color_continuous_scale=px.colors.sequential.Blues)
fig.show()

### Treemap with Companies

In [5]:
# generate a tree map that sums up all layoffs by company across all states 
# Prepare data for company-level treemap
company_layoffs = {}
for state, path in state_files.items():
    try:
        df = pd.read_csv(path)
        if 'Company' in df.columns and 'Number of Workers' in df.columns:
            for _, row in df.iterrows():
                company = row['Company']
                layoffs = row['Number of Workers'] if pd.notna(row['Number of Workers']) else 0
                if company in company_layoffs:
                    company_layoffs[company] += layoffs
                else:
                    company_layoffs[company] = layoffs
    except Exception as e:
        print(f"Error reading {state}: {e}")
# Prepare data for company-level treemap
company_df = pd.DataFrame(company_layoffs.items(), columns=['Company', 'Layoffs'])
# Create company-level treemap
fig_company = px.treemap(company_df, path=['Company'], values='Layoffs',
                         title='Tech Layoffs by Company (2025)',
                         color='Layoffs', color_continuous_scale=px.colors.sequential.Reds)
fig_company.show()
