In [28]:
import pandas as pd
import numpy as np
import json

In [2]:
layoffs = pd.read_csv('layoffs.csv')
layoffs

Unnamed: 0,company,location,industry,total_laid_off,percentage_laid_off,date,stage,country,funds_raised
0,N26,Berlin,Finance,71.0,0.04,2023-04-28,Series E,United States,1700.0
1,Providoor,Melbourne,Food,,1.00,2023-04-28,Unknown,Australia,
2,Dropbox,SF Bay Area,Other,500.0,0.16,2023-04-27,Post-IPO,United States,1700.0
3,Vroom,New York City,Transportation,120.0,0.11,2023-04-27,Post-IPO,United States,1300.0
4,Greenhouse,New York City,Recruiting,100.0,0.12,2023-04-27,Private Equity,United States,110.0
...,...,...,...,...,...,...,...,...,...
2540,Panda Squad,SF Bay Area,Consumer,6.0,0.75,2020-03-13,Seed,United States,1.0
2541,Tamara Mellon,Los Angeles,Retail,20.0,0.40,2020-03-12,Series C,United States,90.0
2542,EasyPost,Salt Lake City,Logistics,75.0,,2020-03-11,Series A,United States,12.0
2543,Blackbaud,Charleston,Other,500.0,0.14,,Post-IPO,United States,


In [3]:
# Table-Based Visualization - Total Layoffs Per Industry
layoffs_per_industry = pd.DataFrame(layoffs.groupby('industry')['total_laid_off'].sum())
layoffs_per_industry.to_csv('layoffs_per_industry.csv')
layoffs_per_industry

Unnamed: 0_level_0,total_laid_off
industry,Unnamed: 1_level_1
Aerospace,661.0
Construction,3863.0
Consumer,56340.0
Crypto,10785.0
Data,5899.0
Education,14169.0
Energy,802.0
Finance,30629.0
Fitness,8898.0
Food,33962.0


In [29]:
# Network-Based Visualization - Top 5 Companies with Most Layoffs Per Industry
companies_total = layoffs.groupby('company')['total_laid_off'].sum()
unique_companies = layoffs.drop_duplicates(subset='company')
unique_companies['total_laid_off'] = unique_companies['company'].apply(lambda x: companies_total[x])
sorted_df = unique_companies.sort_values(['industry', 'total_laid_off'], ascending=[True, False])
sorted_df = sorted_df[['company', 'industry', 'total_laid_off']]
top_5 = sorted_df.groupby('industry').head(5)

companies = top_5['company'].unique().tolist()
companies_dict = {}
idx = 1
for company in companies:
    companies_dict[company] = idx
    idx += 1

industries = top_5['industry'].unique().tolist()
industries_dict = {}
for industry in industries:
    industries_dict[industry] = idx
    idx += 1

source = [companies_dict[company] for company in top_5['company']]
target = [industries_dict[industry] for industry in top_5['industry']]
quantity = top_5['total_laid_off']
network_df = pd.DataFrame({'Source ID': source,
                          'Target ID': target,
                          'Quantity': quantity})

nodes = []
jsons = {}
links = []

for company, idx in companies_dict.items():
    tmp = {}
    tmp['id'] = idx
    tmp['name'] = company
    nodes.append(tmp)

for industry, idx in industries_dict.items():
    tmp = {}
    tmp['id'] = idx
    tmp['name'] = industry
    nodes.append(tmp)

for idx, row in network_df.iterrows():
    tmp = {}
    tmp['source'] = int(row['Source ID'])
    tmp['target'] = int(row['Target ID'])
    tmp['value'] = int(row['Quantity'])
    links.append(tmp)
    
jsons['nodes'] = nodes
jsons['links'] = links

with open('network.json', 'w') as f:
    json.dump(jsons, f)
jsons

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unique_companies['total_laid_off'] = unique_companies['company'].apply(lambda x: companies_total[x])


{'nodes': [{'id': 1, 'name': 'OneWeb'},
  {'id': 2, 'name': 'Kitty Hawk'},
  {'id': 3, 'name': 'Astra'},
  {'id': 4, 'name': 'AirMap'},
  {'id': 5, 'name': 'Katerra'},
  {'id': 6, 'name': 'RenoRun'},
  {'id': 7, 'name': 'Procore'},
  {'id': 8, 'name': 'Yojak'},
  {'id': 9, 'name': 'Tul'},
  {'id': 10, 'name': 'Meta'},
  {'id': 11, 'name': 'Google'},
  {'id': 12, 'name': 'Twitter'},
  {'id': 13, 'name': 'Bytedance'},
  {'id': 14, 'name': 'Yahoo'},
  {'id': 15, 'name': 'Crypto.com'},
  {'id': 16, 'name': 'Coinbase'},
  {'id': 17, 'name': 'Kraken'},
  {'id': 18, 'name': 'Bybit'},
  {'id': 19, 'name': 'Huobi'},
  {'id': 20, 'name': 'NetApp'},
  {'id': 21, 'name': 'UiPath'},
  {'id': 22, 'name': 'Informatica'},
  {'id': 23, 'name': 'Splunk'},
  {'id': 24, 'name': 'Rackspace'},
  {'id': 25, 'name': "Byju's"},
  {'id': 26, 'name': 'WhiteHat Jr'},
  {'id': 27, 'name': 'Unacademy'},
  {'id': 28, 'name': 'Vedantu'},
  {'id': 29, 'name': 'Skill Lync'},
  {'id': 30, 'name': 'Workrise'},
  {'id': 3

In [None]:
# Geometry-Based Visualization - Layoffs in the United States

def city_to_state(city):
    us_states = {
        'Albany': 'New York',
        'Ann Arbor': 'Michigan',
        'Atlanta': 'Georgia',
        'Austin': 'Texas',
        'Baltimore': 'Maryland',
        'Baton Rouge': 'Louisiana',
        'Beijing': None,  # Not in the US
        'Bend': 'Oregon',
        'Berlin': None,  # Not in the US
        'Birmingham': 'Alabama',
        'Bismarck': 'North Dakota',
        'Boise': 'Idaho',
        'Boston': 'Massachusetts',
        'Boulder': 'Colorado',
        'Brisbane': None,  # Not in the US
        'Burlington': 'Vermont',
        'Charleston': 'South Carolina',
        'Charlotte': 'North Carolina',
        'Chennai': None,  # Not in the US
        'Chicago': 'Illinois',
        'Cincinnati': 'Ohio',
        'Cleveland': 'Ohio',
        'Columbus': 'Ohio',
        'Copenhagen': None,  # Not in the US
        'Dallas': 'Texas',
        'Davenport': 'Iowa',
        'Denver': 'Colorado',
        'Detroit': 'Michigan',
        'Dover': 'Delaware',
        'Dubai': None,  # Not in the US
        'Durham': 'North Carolina',
        'Eindhoven': None,  # Not in the US
        'Fayetteville': 'Arkansas',
        'Grand Rapids': 'Michigan',
        'Houston': 'Texas',
        'Huntsville': 'Alabama',
        'Indianapolis': 'Indiana',
        'Jersey City': 'New Jersey',
        'Kansas City': 'Missouri',
        'Las Vegas': 'Nevada',
        'Lehi': 'Utah',
        'Lexington': 'Kentucky',
        'Little Rock': 'Arkansas',
        'Logan': 'Utah',
        'London': None,  # Not in the US
        'Los Angeles': 'California',
        'Louisville': 'Kentucky',
        'Madison': 'Wisconsin',
        'Melbourne': None,  # Not in the US
        'Mexico City': None,  # Not in the US
        'Miami': 'Florida',
        'Milwaukee': 'Wisconsin',
        'Minneapolis': 'Minnesota',
        'Missoula': 'Montana',
        'Nashua': 'New Hampshire',
        'Nashville': 'Tennessee',
        'Nebraska City': 'Nebraska',
        'New Delhi': None,  # Not in the US
        'New Haven': 'Connecticut',
        'New Hope': 'Pennsylvania',
        'New Orleans': 'Louisiana',
        'New York City': 'New York',
        'Norwalk': 'Connecticut',
        'Orlando': 'Florida',
        'Oxford': None,  # Not in the US
        'Philadelphia': 'Pennsylvania',
        'Phoenix': 'Arizona',
        'Pittsburgh': 'Pennsylvania',
        'Portland': 'Oregon',
        'Providence': 'Rhode Island',
        'Raleigh': 'North Carolina',
        'Reno': 'Nevada',
        'Richmond': 'Virginia',
        'SF Bay Area': 'California',
        'Sacramento': 'California',
        'Salt Lake City': 'Utah',
        'San Antonio': 'Texas',
        'San Diego': 'California',
        'San Luis Obispo': 'California',
        'Santa Barbara': 'California',
        'Santa Fe': 'New Mexico',
        'Sao Paulo': None,  # Not in the US
        'Seattle': 'Washington',
        'Selangor': None,  # Not in the US
        'Singapore': None,  # Not in the US
        'Spokane': 'Washington',
        'St. Louis': 'Missouri',
        'Stamford': 'Connecticut',
        'Stockholm': None,  # Not in the US
        'Sydney': None,  # Not in the US
        'Tampa Bay': 'Florida',
        'Tel Aviv': None,  # Not in the US
        'Tokyo': None,  # Not in the US
        'Toronto': None,  # Not in the US
        'Vancouver': None,  # Not in the US
        'Washington D.C.': 'District of Columbia',
        'Wilmington': 'Delaware'
    }
    
    return us_states[city]

all_states = [
    'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware',
    'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa',
    'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
    'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
    'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
    'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
    'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'
]

us_layoffs = layoffs[(layoffs['country'] == 'United States')]
us_layoffs['location'] = us_layoffs['location'].apply(city_to_state)
us_layoffs = us_layoffs[pd.isna(us_layoffs['location']) != True]
us_layoffs = pd.DataFrame(us_layoffs.groupby('location')['total_laid_off'].sum()).reset_index()
us_layoffs = us_layoffs.set_index('location').reindex(all_states).fillna(0).reset_index()
us_layoffs.to_csv('us_layoffs.csv', index=False)
us_layoffs