In [30]:
## Load Libraries

import pandas as pd
import os
import re

In [31]:
## Load and Clean Data

raw = pd.read_csv("data/wilshire_052924.csv")
rawn = raw.copy().dropna(subset="Ticker")
rawn["Market Cap ($M)"] = raw["Market Cap ($M)"].str.replace("$", "").str.replace(",", "").astype(float)

corps = rawn[(rawn["Market Cap ($M)"] >= 20)][["Ticker", "Name", "Sector", "Market Cap ($M)"]] \
    .drop_duplicates(subset="Name") \
    .rename(columns={"Market Cap ($M)": "Cap"}) \
    .reset_index(drop=True)

In [32]:
## Cleaned Company Names

companies = [
    re.sub(r"\([^)]*\)", "", str(company))
    .replace(",", "")
    .replace(".", "")
    .replace("`", "'") + " "
    for company in corps["Name"].values
]

# Create composite regex pattern
patterns = [
    "incorporated",
    "corporation",
    "company",
    "companies",
    "holdings",
    "holding",
    " inc ",
    " corp ",
    " & co ",
    " co ",
    " plc ",
    " llc ",
    " ltd "
]
regex_pattern = '|'.join(re.escape(pattern) for pattern in patterns)

# Create cleaned names list
cleaned = [re.sub(regex_pattern, "", company, flags=re.IGNORECASE).strip() for company in companies]

# Handle exceptions
cleaned[cleaned.index("Church & DwightInc")] = "Church & Dwight"
cleaned[cleaned.index("American Water WorksInc")] = "American Water Works"
cleaned[cleaned.index("Best BuyInc")] = "Best Buy"
cleaned[cleaned.index("TREXInc")] = "TREX"
cleaned[cleaned.index("DonaldsonInc")] = "Donaldson"
cleaned[cleaned.index("Simpson ManufacturingInc")] = "Simpson Manufacturing"
cleaned[cleaned.index("MSC Industrial DirectInc")] = "MSC Industrial Direct"
cleaned[cleaned.index("Franklin ElectricInc")] = "Franklin Electric"
cleaned[cleaned.index("Boston BeerInc")] = "Boston Beer"
cleaned[cleaned.index("GannettInc")] = "Gannett"
cleaned[cleaned.index("ManitowocInc")] = "Manitowoc"
cleaned[cleaned.index("Maui Land & PineappleInc")] = "Maui Land & Pineapple"
cleaned[cleaned.index("LannettInc")] = "Lannett"
cleaned[cleaned.index("MerckInc")] = "Merck"
cleaned[cleaned.index("KKRInc")] = "KKR"
cleaned[cleaned.index("McCormickInc")] = "McCormick"
cleaned[cleaned.index("Sturm RugerInc")] = "Sturm Ruger"
cleaned[cleaned.index("GreenhillInc")] = "Greenhill"
cleaned[cleaned.index("Comstock Inc")] = "Comstock"
cleaned[cleaned.index("Team")] = "Team Inc"
cleaned[cleaned.index("Dow")] = "Dow Inc"
cleaned[cleaned.index("Visa")] = "Visa Inc"
cleaned[cleaned.index("Amazoncom")] = "Amazon"
cleaned[cleaned.index("Alarmcom")] = "Alarm.com"
cleaned[cleaned.index("Carscom")] = "Cars.com"
cleaned[cleaned.index("1-800 Flowerscom")] = "1-800 Flowers.com"
cleaned[cleaned.index("CarPartscom")] = "CarParts.com"
cleaned[cleaned.index("Lilly")] = "Eli Lilly"
cleaned[cleaned.index("Meta Platforms")] = "Meta"
cleaned[cleaned.index("Uber Technologies")] = "Uber"
cleaned[cleaned.index("Chipotle Mexican Grill")] = "Chipotle"
cleaned[cleaned.index("Skechers U S A")] = "Skechers USA"
cleaned[cleaned.index("Sanfilippo  & Son")] = "John B Sanfilippo & Son"
cleaned[cleaned.index("Lowe's Cos")] = "Lowe's"
cleaned[cleaned.index("Marsh & McLennan Cos")] = "Marsh & McLennan"
cleaned[cleaned.index("Williams Cos")] = "Williams"
cleaned[cleaned.index("Estee Lauder Cos")] = "Estee Lauder"
cleaned[cleaned.index("Greenbrier Cos")] = "Greenbrier"
cleaned[cleaned.index("Haverty Furniture Cos")] = "Haverty Furniture"
cleaned[cleaned.index("Kingstone Cos")] = "Kingstone"
cleaned[cleaned.index("Noodles &")] = "Noodles & Company"
cleaned[cleaned.index("Superior Group of")] = "Superior Group of Companies"

corps["NameCln"] = cleaned
corps.to_csv("data/corps.csv", index=False)

In [33]:
## Manually Search Output

with open("cleaning.txt", "w") as outfile:
    outfile.write("\n".join(cleaned))

In [34]:
## Group by Sectors and Save

industrials = corps[(corps["Sector"]=="Industrials") | (corps["Sector"]=="Basic Materials")].reset_index(drop=True)
healthcare = corps[(corps["Sector"]=="Healthcare")].reset_index(drop=True)
finance = corps[(corps["Sector"]=="Financial Services") | (corps["Sector"]=="Real Estate")].reset_index(drop=True)
tech = corps[(corps["Sector"]=="Technology") | (corps["Sector"]=="Communication Services")].reset_index(drop=True)
consumer = corps[(corps["Sector"]=="Consumer Cyclical") | (corps["Sector"]=="Consumer Defensive")].reset_index(drop=True)
energy = corps[(corps["Sector"]=="Energy") | (corps["Sector"]=="Utilities")].reset_index(drop=True)

names = ["industrials", "healthcare", "finance", "tech", "consumer", "energy"]

for corp in names:
    df = globals()[corp]
    df.to_csv(f"data/corps/{corp}.csv", index=False)

In [35]:
## Load Data Frames

names = ["industrials", "healthcare", "finance", "tech", "consumer", "energy"]

for corp in names:
    exec(f"{corp} = pd.read_csv('data/corps/{corp}.csv')")

corps = pd.read_csv("data/corps.csv")

In [45]:
## Create Batches of size 100

df = consumer

def full_name(ticker, company=True):
    name = corps.loc[corps["Ticker"]==ticker, "NameCln"].values[0]
    if company:
        name = name+" company"
    return name

batch = {}
batch_size = 100
var = df["Ticker"].apply(full_name).values.tolist()

for i in range(0, len(var), batch_size):
    batch_number = i // batch_size + 1
    batch[batch_number] = var[i:i + batch_size]

len(batch)

5

In [None]:
// Create test alert, find event starting with "create" in Network tab, copy as fetch and paste below

fetch("https://www.google.com/alerts/create?x=AMJHsmV1xlyG7JX9o9uu46zrhRu03iKOFg%3A1722636416881&authuser=6", {
  "headers": {
    "accept": "*/*",
    "accept-language": "en-US,en;q=0.9",
    "content-type": "application/x-www-form-urlencoded;charset=UTF-8",
    "priority": "u=1, i",
    "sec-ch-ua": "\"Not/A)Brand\";v=\"8\", \"Chromium\";v=\"126\", \"Google Chrome\";v=\"126\"",
    "sec-ch-ua-arch": "\"x86\"",
    "sec-ch-ua-bitness": "\"64\"",
    "sec-ch-ua-full-version": "\"126.0.6478.183\"",
    "sec-ch-ua-full-version-list": "\"Not/A)Brand\";v=\"8.0.0.0\", \"Chromium\";v=\"126.0.6478.183\", \"Google Chrome\";v=\"126.0.6478.183\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-model": "\"\"",
    "sec-ch-ua-platform": "\"macOS\"",
    "sec-ch-ua-platform-version": "\"14.5.0\"",
    "sec-ch-ua-wow64": "?0",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "x-client-data": "CJS2yQEIpbbJAQipncoBCNGgygEIj+nKAQiTocsBCIagzQEIucjNAQivns4BCMGezgEI3qfOAQierM4BCOSvzgEIn7HOAQikss4BGI/OzQEYoJ3OARjup84BGLquzgEY642lFw=="
  },
  "referrer": "https://www.google.com/alerts?authuser=6",
  "referrerPolicy": "strict-origin-when-cross-origin",
  "body": "params=%5Bnull%2C%5Bnull%2Cnull%2Cnull%2C%5Bnull%2C%22test%202%22%2C%22com%22%2C%5Bnull%2C%22en%22%2C%22US%22%5D%2Cnull%2Cnull%2Cnull%2C0%2C1%5D%2Cnull%2C2%2C%5B%5Bnull%2C2%2C%22%22%2C%5B%5D%2C1%2C%22en-US%22%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2C%220%22%2Cnull%2Cnull%2C%22AB2Xq4hcilCERh73EFWJVHXx-io2lhh1EhC8UD8%22%5D%5D%2Cnull%2Cnull%2C%5B1%2C3%5D%5D%5D",
  "method": "POST",
  "mode": "cors",
  "credentials": "include"
});

In [49]:
## Copy Batch Here

batch[4]

['United Natural Foods company',
 'SpartanNash company',
 'Miller Industries company',
 'Standard Motor Products company',
 'Nu Skin Enterprises company',
 'Hain Celestial Group company',
 'Marinemax company',
 'Biglari company',
 'Dine Brands Global company',
 'Liquidity Services company',
 'Douglas Dynamics company',
 'Turning Point Brands company',
 'Myers Industries company',
 'Legacy Housing company',
 'Everi company',
 'SmartRent company',
 'Natural Grocers by Vitamin Cottage company',
 'Designer Brands company',
 'Calavo Growers company',
 'Beyond Meat company',
 'Build A Bear Workshop company',
 'Stoneridge company',
 'PlayAGS company',
 'Funko company',
 "Chuy's company",
 'Haverty Furniture company',
 'RCI Hospitality company',
 "Lands' End company",
 'Lovesac company',
 'Seneca Foods company',
 'Movado Group company',
 'Lindblad Expeditions company',
 'Carriage Services company',
 'Americas Car Mart company',
 'Zumiez company',
 'Lincoln Educational Services company',
 'Cheg

In [None]:
// Paste into Google Alerts console window; execute in batches of 100

async function createGoogleAlerts(companies) {
    for (let i = 0; i < companies.length; i++) {
        const company = companies[i];
        const alertName = encodeURIComponent(company);
        const body = `params=%5Bnull%2C%5Bnull%2Cnull%2Cnull%2C%5Bnull%2C%22${alertName}%22%2C%22com%22%2C%5Bnull%2C%22en%22%2C%22US%22%5D%2Cnull%2Cnull%2Cnull%2C0%2C1%5D%2Cnull%2C2%2C%5B%5Bnull%2C2%2C%22%22%2C%5B%5D%2C1%2C%22en-US%22%2Cnull%2Cnull%2Cnull%2Cnull%2Cnull%2C%220%22%2Cnull%2Cnull%2C%22AB2Xq4hcilCERh73EFWJVHXx-io2lhh1EhC8UD8%22%5D%5D%2Cnull%2Cnull%2C%5B1%2C3%5D%5D%5D`;

        await fetch("https://www.google.com/alerts/create?x=AMJHsmV1xlyG7JX9o9uu46zrhRu03iKOFg%3A1722636416881&authuser=6", {
            "headers": {
              "accept": "*/*",
              "accept-language": "en-US,en;q=0.9",
              "content-type": "application/x-www-form-urlencoded;charset=UTF-8",
              "priority": "u=1, i",
              "sec-ch-ua": "\"Not/A)Brand\";v=\"8\", \"Chromium\";v=\"126\", \"Google Chrome\";v=\"126\"",
              "sec-ch-ua-arch": "\"x86\"",
              "sec-ch-ua-bitness": "\"64\"",
              "sec-ch-ua-full-version": "\"126.0.6478.183\"",
              "sec-ch-ua-full-version-list": "\"Not/A)Brand\";v=\"8.0.0.0\", \"Chromium\";v=\"126.0.6478.183\", \"Google Chrome\";v=\"126.0.6478.183\"",
              "sec-ch-ua-mobile": "?0",
              "sec-ch-ua-model": "\"\"",
              "sec-ch-ua-platform": "\"macOS\"",
              "sec-ch-ua-platform-version": "\"14.5.0\"",
              "sec-ch-ua-wow64": "?0",
              "sec-fetch-dest": "empty",
              "sec-fetch-mode": "cors",
              "sec-fetch-site": "same-origin",
              "x-client-data": "CJS2yQEIpbbJAQipncoBCNGgygEIj+nKAQiTocsBCIagzQEIucjNAQivns4BCMGezgEI3qfOAQierM4BCOSvzgEIn7HOAQikss4BGI/OzQEYoJ3OARjup84BGLquzgEY642lFw=="
            },
            "referrer": "https://www.google.com/alerts?authuser=6",
            "referrerPolicy": "strict-origin-when-cross-origin",
            "body": body,
            "method": "POST",
            "mode": "cors",
            "credentials": "include"
        }).then(response => {
            console.log(`Alert created for: ${company}`);
            return response.text();
        }).then(text => {
            console.log(text);
        }).catch(error => {
            console.error(`Error creating alert for: ${company}`, error);
        });

        // Exponentially increase the delay, capped at 60-120 seconds.
        const maxDelay = 60 + Math.floor(Math.random() * 61);
        const delay = Math.min((i + 1) * 2, maxDelay) * 1000;
        console.log(`Waiting ${delay / 1000} seconds before the next request...`);
        await new Promise(resolve => setTimeout(resolve, delay));
    }
}

// Paste list in here
createGoogleAlerts(['United Natural Foods company',
'SpartanNash company',
'Miller Industries company',
'Standard Motor Products company',
'Nu Skin Enterprises company',
'Hain Celestial Group company',
'Marinemax company',
'Biglari company',
'Dine Brands Global company',
'Liquidity Services company',
'Douglas Dynamics company',
'Turning Point Brands company',
'Myers Industries company',
'Legacy Housing company',
'Everi company',
'SmartRent company',
'Natural Grocers by Vitamin Cottage company',
'Designer Brands company',
'Calavo Growers company',
'Beyond Meat company',
'Build A Bear Workshop company',
'Stoneridge company',
'PlayAGS company',
'Funko company',
"Chuy's company",
'Haverty Furniture company',
'RCI Hospitality company',
"Lands' End company",
'Lovesac company',
'Seneca Foods company',
'Movado Group company',
'Lindblad Expeditions company',
'Carriage Services company',
'Americas Car Mart company',
'Zumiez company',
'Lincoln Educational Services company',
'Chegg company',
'GP Strategies company',
'Limoneira company',
"Denny's company",
'MasterCraft Boat company',
'Marine Products company',
'JJill company',
'1-800 Flowers.com company',
'Superior Group of Companies company',
'El Pollo Loco company',
'Village Super Market company',
'Johnson Outdoors company',
'American Public Education company',
'Qurate Retail company',
'Genesco company',
'Sleep Number company',
"Nature's Sunshine Products company",
'Weyco Group company',
"Nathan's Famous company",
'Medifast company',
'Potbelly company',
'Rocky Brands company',
'Clarus company',
'Vera Bradley company',
'Lifetime Brands company',
'Cooper-Standard company',
'Lifeway Foods company',
'Stitch Fix company',
'Revlon company',
'J Alexanders company',
'Destination XL Group company',
'Citi Trends company',
'Alico company',
'Jakks Pacific company',
'Virco Manufacturing company',
'Escalade company',
'Hooker Furnishings company',
'Flexsteel Industries company',
'Commercial Vehicle Group company',
'Full House Resorts company',
'Childrens Place company',
'ONE Group Hospitality company',
'New Home company',
'Lakeland Industries company',
'Acme United company',
'Tillys company',
"Sportsman's Warehouse company",
'Bassett Furniture Industries company',
'Duluth company',
'UNIFI company',
'WW International company',
'Canterbury Park company',
'Kewaunee Scientific company',
'Cato company',
'Strattec Security company',
'Red Robin Gourmet Burgers company',
'Motorcar Parts of America company',
'Travelzoo company',
'Superior Industries International company',
'Big Lots company',
'Lifevantage company',
'Serve Robotics company',
'Bridgford Foods company',
'Red Lion Hotels company']);