In [None]:
!pip install rapidfuzz yfinance pandas numpy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting yfinance
  Downloading yfinance-0.2.66-py2.py3-none-any.whl.metadata (6.0 kB)
Collecting pandas>=1.3.0 (from yfinance)
  Using cached pandas-2.3.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting numpy>=1.16.5 (from yfinance)
  Using cached numpy-2.3.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting requests>=2.31 (from yfinance)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.12.tar.gz (19 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting pytz>=2022.5 (from yfinance)
  Using cached pytz-2

In [39]:
from rapidfuzz import process, fuzz
import yfinance as yf
import pandas as pd
import numpy as np

import json

In [None]:
# LOAD SEC(https://www.sec.gov/files/company_tickers.json) TICKERS
with open("tickers.json", "r") as f:
    tickers = json.load(f)
    companies = [tickers[i]["title"] for i in tickers]
    abbrs = [tickers[i]["ticker"] for i in tickers]

    comp_tckr = {comp: abbr for comp, abbr in zip(companies, abbrs)}

# LOAD INITIAL RECALL DATA
df = pd.read_csv("recalls.csv")

In [None]:
import re
from collections import Counter

# Common words to ignore
stop_words = set([
    "inc", "corp", "llc", "co", "ltd", "corporation",
    "company", "limited", "plc", "gmbh", "sa", "ag", "pte", "bv", "kg", "kgaa", "corp", "industries", "us", "of"
])

def tokenize(name):
    name = name.lower()
    name = re.sub(r'[^a-z0-9 ]', ' ', name)  # remove punctuation
    words = name.split()
    return [w for w in words if w not in stop_words]

# Build a word-to-company mapping for fast lookup
word_counts = Counter()
company_word_map = {}
for c in companies:
    words = tokenize(c)
    company_word_map[c] = set(words)
    word_counts.update(words)


def match_by_unique_words(name):
    words = tokenize(name)
    if not words:
        return None
    
    # Score companies by sum of inverse word frequencies (rare words are more valuable)
    scores = {}
    for c, c_words in company_word_map.items():
        score = sum(1 / word_counts[w] for w in words if w in c_words)
        if score > 0:
            scores[c] = score
    
    if scores:
        return max(scores, key=scores.get)
    return None

df.loc[:20, "fuzzy_company"] = df.loc[:20, "Manufacturer"].apply(match_by_unique_words)

In [136]:
df

Unnamed: 0,Report Received Date,NHTSA ID,Recall Link,Manufacturer,Subject,Component,Mfr Campaign Number,Recall Type,Potentially Affected,Recall Description,Consequence Summary,Corrective Action,Park Outside Advisory,Do Not Drive Advisory,Completion Rate % (Blank - Not Reported),Stock Abbreviation,Opening Stock Value,Closing Stock Value,fuzzy_company,new
0,10/01/2025,25V656000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Toyota Motor Engineering & Manufacturing,Driveshaft May Deform and Break,POWER TRAIN,25TB11 / 25TA11,Vehicle,5960,Toyota Motor Engineering & Manufacturing (Toyo...,A broken driveshaft can impair steering. Vehic...,Dealers will inspect and replace both front dr...,No,No,,NYSE:TM,192.39,191.83,TOYOTA MOTOR CORP/,toyota
1,10/01/2025,25V655000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Nissan North America, Inc.",Fire Risk from Quick Charging Battery,ELECTRICAL SYSTEM,R25C8,Vehicle,19077,"Nissan North America, Inc. (Nissan) is recalli...",A quick charging battery that overheats increa...,Owners are advised not to use Level 3 quick ch...,No,No,,NYSE:NSANY,,,Central North Airport Group,nissan
2,09/30/2025,25V654000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"Volvo Car USA, LLC",Power Operated Tailgate May Drop Suddenly,STRUCTURE,R10342,Vehicle,1119,"Volvo Car USA, LLC (Volvo) is recalling certai...",A tailgate that suddenly drops can hit a perso...,Owners are advised not to use the POT function...,No,No,,NYSE:,,,"Mister Car Wash, Inc.",volvo
3,09/30/2025,25V653000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Thor Motor Coach,LP Tank May Detach and Become a Road Hazard,EQUIPMENT,RC000331,Vehicle,23,Thor Motor Coach (TMC) is recalling certain 20...,A detached tank can create a road hazard for o...,The remedy is currently under development. Own...,No,No,,NYSE:,,,THOR INDUSTRIES INC,thor coach
4,09/26/2025,25E062000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"MEDIX SPECIALTY VEHICLES, INC.",Quick Liner Wheel Covers May Detach and Become...,WHEELS,,Equipment,396,"Medix Specialty Vehicles, Inc. (Medix) is reca...",A detached cover can create a road hazard for ...,"Medix will replace the covers, free of charge....",No,No,,NYSE:,,,"Envirotech Vehicles, Inc.",medix specialty vehicles
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29369,09/29/1966,66V003000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,Honda (American Honda Motor Co.),POWER TRAIN:TRANSMISSION:STANDARD:MANUAL,POWER TRAIN,NR (Not Reported),Vehicle,18572,,,,No,No,,NYSE:,,,,
29370,09/19/1966,66V178000,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"RENAULT, INCORPORATED",BRAKES:HYDRAULIC:LINES:HOSE:NON-METALLIC,"SERVICE BRAKES, HYDRAULIC",NR (Not Reported),Vehicle,125,POSSIBILITY THAT THE FRONT BRAKE HOSE END FITT...,IF SUCH LEAKAGE OCCURS AND IF ENOUGH BRAKE FLU...,(REPLACE FRONT BRAKE HOSES AND SEALS.),No,No,,NYSE:,,,,
29371,01/19/1966,66V032001,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"General Motors, LLC",STEERING:COLUMN,STEERING,NR (Not Reported),Vehicle,138878,,,,No,No,,NYSE:GM,,,,
29372,01/19/1966,66V032003,Go to Recall (https://www.nhtsa.gov/recalls?nh...,"General Motors, LLC",STEERING:COLUMN,STEERING,NR (Not Reported),Vehicle,70644,,,,No,No,,NYSE:GM,,,,


In [None]:
# GET STOCK TICKER
df["ticker"] = df["fuzzy_company"].map(comp_tckr)

# ENSURE DATETIME
df["Report Received Date"] = pd.to_datetime(df["Report Received Date"])

# GET ALL PRICING DATA
unique_tickers = df["ticker"].unique().tolist()
start_date = df["Report Received Date"].min()
end_date = df["Report Received Date"].max() + pd.Timedelta(days=1)
price_data = yf.download(unique_tickers, start=start_date, end=end_date, group_by="ticker", progress=False)

# HELPER TO GET PRICE DATA
def get_price(ticker, date):
    try:
        day_data = price_data[ticker].loc[date.strftime("%Y-%m-%d")]
        return pd.Series([day_data["Open"], day_data["Close"]])
    except Exception:
        return pd.Series([None, None])

# OBTAIN OPENING AND CLOSING PRICE
df[["open", "close"]] = df.apply(lambda x: get_price(x["ticker"], x["Report Received Date"]), axis=1)

# GRAB CURRENT INFO
info_cache = {}
for ticker in unique_tickers:
    try:
        t = yf.Ticker(ticker)
        info = t.info
        info_cache[ticker] = {
            "market_cap": info.get("marketCap"),
            "eps": info.get("trailingEps"),
            "full_name": info.get("longName"),
        }
    except Exception:
        pass

# CREATE AND MERGE INFO
info_df = pd.DataFrame(info_cache).T
info_df.index.name = "ticker"
df = df.merge(info_df, on="ticker", how="left")

# ADJUST CAP
df["market_cap_on_date"] = df["close"] * df["shares_outstanding"]