In [1]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from pathlib import Path
from typing import List, Dict, Any, Tuple, Union

animals = {
    'category': ['cat', 'dog', 'fish', 'bird', 'reptile', 'insect'],
    'description': ['CIA/US This animal has whiskers and a tail, folklore says it hasd nine lives. It is a popular animal to have as a pet and they\'re known for their independence and solidarity. ', 
                    'NSA This animal is known as man\'s best friend. They are loyal and protective. They are known for their barking and wagging tails. They are also known for their keen sense of smell. They are also known for their ability to learn tricks. They are also known for their ability to learn tricks.',
                    'USA This animal lives in water and has gills. They are known for their scales and fins. They are also known for their ability to swim and breathe underwater. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.',
                    'UAE This animal has feathers and wings. They are known for their ability to fly. They are also known for their ability to sing. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.', 
                    'This animal has scales and is cold-blooded. They are known for their ability to shed their skin. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.',
                    'KGB This animal has six legs and is known for their ability to crawl. They are also known for their ability to fly. They are also known for their ability to sting. They are also known for their ability to sting. They are also known for their ability to sting.'
                    ]
}

df_animals = pd.DataFrame(animals)
pd.set_option('display.max_colwidth', None)
# display(df_animals)

class Acronym:
    def __init__(self, df_in:pd.DataFrame, colname:str) -> None:
        self.list_in = df_in[colname].to_list()
        self.path_csv = Path('/Users/adamkurth/Documents/vscode/code/nlp-demos/known_acronyms.csv')
        self.known_acronyms_df = pd.read_csv(self.path_csv)
        self.known_acronyms = self.known_acronyms_df['acronym'].to_dict()
        self.exceptions = {'pre', 'post', 'mid', 'non', 'anti', 'pro', 'trans',}
        self.main()
        
    
    def is_acronym(self, token:str) -> bool:
        num_caps = 2
        if self.is_known_acronym(token): return True
        if len(token) > 2 and sum(1 for c in token if c.isupper()) >= num_caps: return True
        return False
    
    def is_known_acronym(self, token:str) -> bool:
        return token in self.known_acronyms_df['acronym'].values
    
    def get_definition(self, acronym:str) -> str:
        try:
            return self.known_acronyms_df[self.known_acronyms_df['acronym'] == acronym]['definition'].values[0]
        except:
            Warning(f'{acronym} is not defined in the known acronyms list.')

    def is_unique(self, token:str, definition:str) -> bool:
        return token not in self.known_acronyms_df['definition'].values

    def split_token(self,token:str) -> Tuple[str,str]:
        return token.split('/')
    
    def filter(self, input_list) -> List[str]:
        caught, unique, undefined = [], [], 0
        words = word_tokenize(input_list)
        for token in words:
            if self.is_acronym(token): # checks is_known
                caught.append(token)
                definition = self.get_definition(token) 
                if definition is None:
                    continue
                else:
                    input_list = input_list.replace(token, definition)

                if self.is_unique(token, definition):
                    unique.append((token, definition)) 

        return [input_list, caught, unique]         
    
    def main(self) -> None:
        filtered_list, caught, unique = [self.filter(item)[0] for item in self.list_in], [self.filter(item)[1] for item in self.list_in], [self.filter(item)[2] for item in self.list_in]
        df_out = pd.DataFrame({
            'original': self.list_in,
            'filtered': filtered_list,
            'unique': unique, 
            'caught': caught})
        display(df_out)


# prep(df_animals, 'description')
acronym = Acronym(df_in=df_animals, colname='description')
acronym.main()


Unnamed: 0,original,filtered,unique,caught
0,"CIA/US This animal has whiskers and a tail, folklore says it hasd nine lives. It is a popular animal to have as a pet and they're known for their independence and solidarity.","CIA/US This animal has whiskers and a tail, folklore says it hasd nine lives. It is a popular animal to have as a pet and they're known for their independence and solidarity.",[],[CIA/US]
1,NSA This animal is known as man's best friend. They are loyal and protective. They are known for their barking and wagging tails. They are also known for their keen sense of smell. They are also known for their ability to learn tricks. They are also known for their ability to learn tricks.,National Security Agency This animal is known as man's best friend. They are loyal and protective. They are known for their barking and wagging tails. They are also known for their keen sense of smell. They are also known for their ability to learn tricks. They are also known for their ability to learn tricks.,"[(NSA, National Security Agency)]",[NSA]
2,USA This animal lives in water and has gills. They are known for their scales and fins. They are also known for their ability to swim and breathe underwater. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,United States of America This animal lives in water and has gills. They are known for their scales and fins. They are also known for their ability to swim and breathe underwater. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,"[(USA, United States of America)]",[USA]
3,UAE This animal has feathers and wings. They are known for their ability to fly. They are also known for their ability to sing. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,UAE This animal has feathers and wings. They are known for their ability to fly. They are also known for their ability to sing. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,[],[UAE]
4,This animal has scales and is cold-blooded. They are known for their ability to shed their skin. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,This animal has scales and is cold-blooded. They are known for their ability to shed their skin. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,[],[]
5,KGB This animal has six legs and is known for their ability to crawl. They are also known for their ability to fly. They are also known for their ability to sting. They are also known for their ability to sting. They are also known for their ability to sting.,Komitet Gosudarstvennoy Bezopasnosti This animal has six legs and is known for their ability to crawl. They are also known for their ability to fly. They are also known for their ability to sting. They are also known for their ability to sting. They are also known for their ability to sting.,"[(KGB, Komitet Gosudarstvennoy Bezopasnosti)]",[KGB]


Unnamed: 0,original,filtered,unique,caught
0,"CIA/US This animal has whiskers and a tail, folklore says it hasd nine lives. It is a popular animal to have as a pet and they're known for their independence and solidarity.","CIA/US This animal has whiskers and a tail, folklore says it hasd nine lives. It is a popular animal to have as a pet and they're known for their independence and solidarity.",[],[CIA/US]
1,NSA This animal is known as man's best friend. They are loyal and protective. They are known for their barking and wagging tails. They are also known for their keen sense of smell. They are also known for their ability to learn tricks. They are also known for their ability to learn tricks.,National Security Agency This animal is known as man's best friend. They are loyal and protective. They are known for their barking and wagging tails. They are also known for their keen sense of smell. They are also known for their ability to learn tricks. They are also known for their ability to learn tricks.,"[(NSA, National Security Agency)]",[NSA]
2,USA This animal lives in water and has gills. They are known for their scales and fins. They are also known for their ability to swim and breathe underwater. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,United States of America This animal lives in water and has gills. They are known for their scales and fins. They are also known for their ability to swim and breathe underwater. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,"[(USA, United States of America)]",[USA]
3,UAE This animal has feathers and wings. They are known for their ability to fly. They are also known for their ability to sing. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,UAE This animal has feathers and wings. They are known for their ability to fly. They are also known for their ability to sing. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,[],[UAE]
4,This animal has scales and is cold-blooded. They are known for their ability to shed their skin. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,This animal has scales and is cold-blooded. They are known for their ability to shed their skin. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs. They are also known for their ability to lay eggs.,[],[]
5,KGB This animal has six legs and is known for their ability to crawl. They are also known for their ability to fly. They are also known for their ability to sting. They are also known for their ability to sting. They are also known for their ability to sting.,Komitet Gosudarstvennoy Bezopasnosti This animal has six legs and is known for their ability to crawl. They are also known for their ability to fly. They are also known for their ability to sting. They are also known for their ability to sting. They are also known for their ability to sting.,"[(KGB, Komitet Gosudarstvennoy Bezopasnosti)]",[KGB]
