In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm
import time
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.snowball import SnowballStemmer
from nltk.stem import WordNetLemmatizer

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold, cross_val_score
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn import preprocessing, model_selection, pipeline
from sklearn.metrics import f1_score, roc_auc_score
from pathlib import Path
import sys


In [2]:
BASE_DIR = str(Path().cwd().parent.resolve())
sys.path.insert(0, BASE_DIR)

In [3]:
df = pd.read_csv(f'{BASE_DIR}/data/fake_job_postings.csv', engine = 'python', encoding = 'utf8')
df.head()

Unnamed: 0,job_id,title,location,department,salary_range,company_profile,description,requirements,benefits,telecommuting,has_company_logo,has_questions,employment_type,required_experience,required_education,industry,function,fraudulent
0,1,Marketing Intern,"US, NY, New York",Marketing,,"We're Food52, and we've created a groundbreaki...","Food52, a fast-growing, James Beard Award-winn...",Experience with content management systems a m...,,0,1,0,Other,Internship,,,Marketing,0
1,2,Customer Service - Cloud Video Production,"NZ, , Auckland",Success,,"90 Seconds, the worlds Cloud Video Production ...",Organised - Focused - Vibrant - Awesome!Do you...,What we expect from you:Your key responsibilit...,What you will get from usThrough being part of...,0,1,0,Full-time,Not Applicable,,Marketing and Advertising,Customer Service,0
2,3,Commissioning Machinery Assistant (CMA),"US, IA, Wever",,,Valor Services provides Workforce Solutions th...,"Our client, located in Houston, is actively se...",Implement pre-commissioning and commissioning ...,,0,1,0,,,,,,0
3,4,Account Executive - Washington DC,"US, DC, Washington",Sales,,Our passion for improving quality of life thro...,THE COMPANY: ESRI – Environmental Systems Rese...,"EDUCATION: Bachelor’s or Master’s in GIS, busi...",Our culture is anything but corporate—we have ...,0,1,0,Full-time,Mid-Senior level,Bachelor's Degree,Computer Software,Sales,0
4,5,Bill Review Manager,"US, FL, Fort Worth",,,SpotSource Solutions LLC is a Global Human Cap...,JOB TITLE: Itemization Review ManagerLOCATION:...,QUALIFICATIONS:RN license in the State of Texa...,Full Benefits Offered,0,1,1,Full-time,Mid-Senior level,Bachelor's Degree,Hospital & Health Care,Health Care Provider,0


In [4]:
textdf = df.iloc[:, [1,5,6,7,8,17]]
textdf = textdf.fillna('Unspecified')
textdf.head()

Unnamed: 0,title,company_profile,description,requirements,benefits,fraudulent
0,Marketing Intern,"We're Food52, and we've created a groundbreaki...","Food52, a fast-growing, James Beard Award-winn...",Experience with content management systems a m...,Unspecified,0
1,Customer Service - Cloud Video Production,"90 Seconds, the worlds Cloud Video Production ...",Organised - Focused - Vibrant - Awesome!Do you...,What we expect from you:Your key responsibilit...,What you will get from usThrough being part of...,0
2,Commissioning Machinery Assistant (CMA),Valor Services provides Workforce Solutions th...,"Our client, located in Houston, is actively se...",Implement pre-commissioning and commissioning ...,Unspecified,0
3,Account Executive - Washington DC,Our passion for improving quality of life thro...,THE COMPANY: ESRI – Environmental Systems Rese...,"EDUCATION: Bachelor’s or Master’s in GIS, busi...",Our culture is anything but corporate—we have ...,0
4,Bill Review Manager,SpotSource Solutions LLC is a Global Human Cap...,JOB TITLE: Itemization Review ManagerLOCATION:...,QUALIFICATIONS:RN license in the State of Texa...,Full Benefits Offered,0


In [85]:
def clean_text(text):
    text = text.lower()
    
    text = text.replace(r'&amp;', '&')
    text = text.replace(r'&nbsp;', ' ')
    text = text.replace(r'&lt;', '<')
    text = text.replace(r'&gt;', '>')
    text = text.replace(r'&quot;', '"')
    text = text.replace(r'\u00a0', ' ')
    
    text = re.sub('\'re', ' are', text)
    text = re.sub('\'ve', ' have', text)
    text = re.sub('\'m', ' am', text)
    text = re.sub('\'t', ' not', text)
    text = re.sub('\'s', ' ', text)
    
    sublist = '\[.*?\]|https\:\/\/\S+|www\.\S+|<.*?>+|\n|\w*\d\w*'
    text = re.sub(sublist, '', text)
    
    text = re.sub('[^a-zA-Z0-9]', ' ', text)
    
    return text

아래 키워드에 특수문자 나와서 그거 제거하는거 추가함

In [6]:
import warnings
warnings.filterwarnings('ignore')

In [86]:
text_cleaned = textdf.copy(deep = True)

for col in text_cleaned.columns[:-1]:
    text_cleaned[col] = text_cleaned[col].astype(str).apply(lambda x : clean_text(x))
    
text_cleaned.head()

Unnamed: 0,title,company_profile,description,requirements,benefits,fraudulent
0,marketing intern,we are and we have created a groundbreaking ...,a fast growing james beard award winning on...,experience with content management systems a m...,unspecified,0
1,customer service cloud video production,seconds the worlds cloud video production se...,organised focused vibrant awesome do you...,what we expect from you your key responsibilit...,what you will get from usthrough being part of...,0
2,commissioning machinery assistant cma,valor services provides workforce solutions th...,our client located in houston is actively se...,implement pre commissioning and commissioning ...,unspecified,0
3,account executive washington dc,our passion for improving quality of life thro...,the company esri environmental systems rese...,education bachelor s or master s in gis busi...,our culture is anything but corporate we have ...,0
4,bill review manager,spotsource solutions llc is a global human cap...,job title itemization review managerlocation ...,qualifications rn license in the state of texa...,full benefits offered,0


# TF-IDF stemming

In [8]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

def sort_coo(coo_matrix):
    tuples = zip(coo_matrix.col, coo_matrix.data)
    return sorted(tuples, key = lambda x: (x[1], x[0]), reverse=True)

def extract_topn_from_vector(feature_names, sorted_items, topn=10):
    sorted_items = sorted_items[:topn]

    score_vals = []
    feature_vals = []
    
    for idx, score in sorted_items:
        score_vals.append(round(score, 3))
        feature_vals.append(feature_names[idx])

    results = {}
    for idx in range(len(feature_vals)):
        results[feature_vals[idx]]=score_vals[idx]
    
    return results

def get_keywords_tfidf(vectorizer, feature_names, doc):
    tf_idf_vector = vectorizer.transform([doc])
    
    sorted_items = sort_coo(tf_idf_vector.tocoo())

    keywords = extract_topn_from_vector(feature_names,sorted_items)
    
    return list(keywords.keys())


def tfidf_stemmed_keywords(DATA, col, topn, target_is_fake = True, sub_na = True, is_stem = True):
    corp = DATA[col][DATA.fraudulent != target_is_fake].tolist()
    
    sw = stopwords.words('english')
    cv = CountVectorizer(max_df = 0.85, stop_words = sw)
    word_cv = cv.fit_transform(corp)
    
    tfidf_trans = TfidfTransformer(smooth_idf = True, use_idf = True)
    tfidf_trans.fit(word_cv)
    
    corp_test = DATA[col][DATA.fraudulent == target_is_fake].tolist()
    
    feature_names = cv.get_feature_names()
    
    k_dict = {}
    for doc in corp_test:
        tfidf_vec = tfidf_trans.transform(cv.transform([doc]))
        sorted_items = sort_coo(tfidf_vec.tocoo())
        keywords = extract_topn_from_vector(feature_names, sorted_items, 20)
        if sub_na and 'unspecified' in keywords:
            continue
        k_dict.update(keywords)
    
    k_list = list(dict(sorted(k_dict.items(), key = lambda item : item[1], reverse = True)).keys())[:topn]
    
    if is_stem:
        ss = SnowballStemmer('english')
        k_list = [ss.stem(x) for x in k_list]
    
    return k_list

In [165]:
for col in text_cleaned.columns[:-1]:
    print('20 TF-IDF stemmed keywords in {} of FAKE'.format(col))
    print(tfidf_stemmed_keywords(text_cleaned, col, 20))
    print()

20 TF-IDF stemmed keywords in title of FAKE
['admin', 'incom', 'personnel', 'bookkeep', 'administr', 'awesom', 'dispatch', 'optician', 'writer', 'ux', 'rn', 'success', 'csr', 'club', 'model', 'asset', 'instal', 'optic', 'drill', 'divis']

20 TF-IDF stemmed keywords in company_profile of FAKE
['instant', 'sti', 'notif', 'well', 'reward', 'realti', 'gari', 'system', 'secur', 'recruit', 'netgear', 'financ', 'properti', 'import', 'earn', 'resourc', 'welldesign', 'rail', 'entrylevel', 'invest']

20 TF-IDF stemmed keywords in description of FAKE
['de', 'fill', 'church', 'graphit', 'websit', 'webcam', 'colorado', 'vessel', 'bin', 'freelanc', 'food', 'taipei', 'bw', 'dyson', 'pipe', 'ae', 'build', 'claim', 'control', 'project']

20 TF-IDF stemmed keywords in requirements of FAKE
['word', 'cvresum', 'de', 'necessari', 'requir', 'start', 'account', 'typist', 'paypal', 'glass', 'church', 'clean', 'equivalentgood', 'degreegood', 'attitud', 'forc', 'fee', 'requiredprofici', 'exp', 'hat']

20 TF-IDF

In [166]:
for col in text_cleaned.columns[:-1]:
    print('20 TF-IDF unstemmed keywords in {} of FAKE'.format(col))
    print(tfidf_stemmed_keywords(text_cleaned, col, 20, is_stem = False))
    print()

20 TF-IDF unstemmed keywords in title of FAKE
['admin', 'income', 'personnel', 'bookkeeper', 'administration', 'awesome', 'dispatcher', 'optician', 'writers', 'ux', 'rn', 'success', 'csr', 'club', 'model', 'asset', 'installation', 'optical', 'drilling', 'division']

20 TF-IDF unstemmed keywords in company_profile of FAKE
['instant', 'sti', 'notification', 'wellness', 'reward', 'realty', 'gary', 'systems', 'secured', 'recruits', 'netgear', 'financing', 'properties', 'imports', 'earnings', 'resource', 'welldesigned', 'rail', 'entrylevel', 'investment']

20 TF-IDF unstemmed keywords in description of FAKE
['de', 'fill', 'church', 'graphite', 'website', 'webcam', 'colorado', 'vessel', 'bin', 'freelance', 'food', 'taipei', 'bw', 'dyson', 'piping', 'ae', 'build', 'claims', 'controlled', 'project']

20 TF-IDF unstemmed keywords in requirements of FAKE
['word', 'cvresume', 'de', 'necessary', 'requirement', 'start', 'accounts', 'typist', 'paypal', 'glasses', 'church', 'clean', 'equivalentgood',

In [167]:
for col in text_cleaned.columns[:-1]:
    print('20 TF-IDF stemmed keywords in {} of REAL'.format(col))
    print(tfidf_stemmed_keywords(text_cleaned, col, 20, target_is_fake = False))
    print()

20 TF-IDF stemmed keywords in title of REAL
['product', 'head', 'leader', 'design', 'level', 'financ', 'web', 'expert', 'field', 'coordin', 'care', 'driver', 'oil', 'center', 'appli', 'need', 'tester', 'remot', 'geologist', 'electrician']

20 TF-IDF stemmed keywords in company_profile of REAL
['di', 'luxuri', 'billion', 'receiv', 'protect', 'ltd', 'franchis', 'advoc', 'reliabl', 'real', 'develop', 'analyst', 'line', 'much', 'environment', 'research', 'secur', 'build', 'locat', 'rental']

20 TF-IDF stemmed keywords in description of REAL
['name', 'scada', 'alpha', 'fraud', 'crew', 'pipe', 'engineerloc', 'er', 'claim', 'solar', 'archiv', 'content', 'librari', 'javascript', 'wadur', 'christma', 'amet', 'percent', 'modur', 'ink']

20 TF-IDF stemmed keywords in requirements of REAL
['doctor', 'de', 'beauti', 'transport', 'dermatolog', 'wood', 'support', 'amet', 'cnc', 'sas', 'io', 'estat', 'extens', 'peoplesoft', 'lawson', 'scada', 'icon', 'demonstr', 'valid', 'maid']

20 TF-IDF stemmed key

In [168]:
for col in text_cleaned.columns[:-1]:
    print('20 TF-IDF unstemmed keywords in {} of REAL'.format(col))
    print(tfidf_stemmed_keywords(text_cleaned, col, 20, target_is_fake = False, is_stem = False))
    print()

20 TF-IDF unstemmed keywords in title of REAL
['production', 'head', 'leader', 'designer', 'level', 'finance', 'web', 'expert', 'field', 'coordinator', 'care', 'driver', 'oil', 'center', 'apply', 'needed', 'tester', 'remote', 'geologist', 'electrician']

20 TF-IDF unstemmed keywords in company_profile of REAL
['di', 'luxurious', 'billion', 'receive', 'protection', 'ltd', 'franchise', 'advocate', 'reliability', 'real', 'develops', 'analyst', 'lines', 'much', 'environmental', 'research', 'security', 'building', 'location', 'rental']

20 TF-IDF unstemmed keywords in description of REAL
['name', 'scada', 'alpha', 'fraud', 'crew', 'pipe', 'engineerlocation', 'ers', 'claims', 'solar', 'archiving', 'content', 'library', 'javascript', 'waduration', 'christmas', 'amet', 'percent', 'moduration', 'ink']

20 TF-IDF unstemmed keywords in requirements of REAL
['doctors', 'de', 'beauty', 'transport', 'dermatology', 'wood', 'support', 'amet', 'cnc', 'sas', 'ios', 'estate', 'extensive', 'peoplesoft', '

di, de, io, et 이런 문자는 왜 나오는지 잘 모르겠음;

# Rake stemming

In [9]:
from rake_nltk import Rake
from collections import Counter

def get_keyword_rake(text, length):
    r = Rake()
    r.extract_keywords_from_text(text)
    full_keyword = r.get_ranked_phrases_with_scores()
    
    result = []
    for sc, k in full_keyword:
        if len(k.split()) == length:
            result.append((k, sc))
    return dict(result[:20])

def multi_stem(phrase):
    tokenizer = nltk.tokenize.RegexpTokenizer(r'\w+')
    ph_t = tokenizer.tokenize(phrase)
    
    ss = SnowballStemmer('english')
    ph_s = [ss.stem(x) for x in ph_t]
    
    return ' '.join(ph_s)

def rake_stemmed_keywords(DATA, col, length, topn, target_is_fake = True, sub_na = True, is_stem = True):
    k_dict = {}
    
    for x in DATA[col][DATA.fraudulent == target_is_fake]:
        kw = get_keyword_rake(x, length)
        if sub_na and 'unspecified' in kw:
            continue
        k_dict.update(kw)
    
    k_list = list(dict(sorted(k_dict.items(), key = lambda item : item[1], reverse = True)).keys())[:topn]
    
    if is_stem:
        k_list = [multi_stem(x) for x in k_list]
        
    return k_list

In [191]:
for col in text_cleaned.columns[:-1]:
    print('20 Rake stemmed keywords in {} of FAKE'.format(col))
    print(rake_stemmed_keywords(text_cleaned, col, 2, 20))
    print()

20 Rake stemmed keywords in title of FAKE
['ice technician', 'forward cap', 'sale execut', 'servic associ', 'executivehead chef', 'administr assist', 'job vacanc', 'hse manag', 'administr assist', 'structur engin', 'mechan technician', 'machin depart', 'data entri', 'hardwareelectr engin', 'java develop', 'mainten supervisor', 'industri mechan', 'cad oper', 'beverag director', 'product market']

20 Rake stemmed keywords in company_profile of FAKE
['mission critic', 'refin resourc', 'park lane', 'azco inc', 'real estat', 'dyncorp intern', 'general construct', 'unmatch save', 'new compani', 'defens technolog', 'denver colorado', 'inctor consult', 'luxuri comfort', 'servic necessari', 'invest servic', 'storey hotel', 'cell phone', 'reliabl incom', 'produc work', 'intellig agenc']

20 Rake stemmed keywords in description of FAKE
['salari account', 'guest experi', 'web applic', 'cash receipt', 'manag peopl', 'full knowledg', 'project engin', 'requir minimum', 'project deliver', 'system arch

In [192]:
for col in text_cleaned.columns[:-1]:
    print('20 Rake unstemmed keywords in {} of FAKE'.format(col))
    print(rake_stemmed_keywords(text_cleaned, col, 2, 20, is_stem = False))
    print()

20 Rake unstemmed keywords in title of FAKE
['ice technician', 'forward cap', 'sales executive', 'service associate', 'executivehead chef', 'administrative assistant', 'job vacancies', 'hse manager', 'administrative assistance', 'structural engineer', 'mechanical technician', 'machining department', 'data entry', 'hardwareelectrical engineer', 'java developer', 'maintenance supervisor', 'industrial mechanic', 'cad operator', 'beverage director', 'product marketing']

20 Rake unstemmed keywords in company_profile of FAKE
['mission critical', 'refined resources', 'park lane', 'azco inc', 'real estate', 'dyncorp international', 'general construction', 'unmatched savings', 'new company', 'defense technology', 'denver colorado', 'inctor consulting', 'luxury comfort', 'services necessary', 'investment services', 'storey hotel', 'cell phones', 'reliable income', 'produce work', 'intelligence agencies']

20 Rake unstemmed keywords in description of FAKE
['salary account', 'guests experience', 

In [195]:
for col in text_cleaned.columns[:-1]:
    print('20 Rake stemmed keywords in {} of REAL'.format(col))
    print(rake_stemmed_keywords(text_cleaned, col, 2, 20, target_is_fake = False))
    print()

20 Rake stemmed keywords in title of REAL
['pipe engin', 'multipl posit', 'hr advisor', 'fulltim open', 'print design', 'mobil app', 'saa sale', 'chief execut', 'account execut', 'back end', 'groom salon', 'market intern', 'account clerk', 'content mf', 'southendonsea traineeship', 'visual design', 'market assist', 'engag manag', 'sponsorship businessfriendcom', 'custom servic']

20 Rake stemmed keywords in company_profile of REAL
['en vertu', 'famili violenc', 'quantit analysi', 'staf compani', 'window server', 'net framework', 'current effort', 'consum media', 'oracl technolog', 'fallarrest system', 'que simplifiquen', 'friend servic', 'medic insur', 'skyplan group', 'servic offer', 'greek market', 'technic compani', 'south delhi', 'maintain applic', 'web mobil']

20 Rake stemmed keywords in description of REAL
['de vacatur', 'de wereldleid', 'ci od', 'met ervar', 'het boekjaar', 'follow school', 'frisco tx', 'retail dealer', 'comput engin', 'teach english', 'london digit', 'braven i

In [196]:
for col in text_cleaned.columns[:-1]:
    print('20 Rake unstemmed keywords in {} of REAL'.format(col))
    print(rake_stemmed_keywords(text_cleaned, col, 2, 20, target_is_fake = False, is_stem = False))
    print()

20 Rake unstemmed keywords in title of REAL
['piping engineer', 'multiple positions', 'hr advisor', 'fulltime opening', 'print designer', 'mobile app', 'saas sales', 'chief executive', 'account executives', 'back end', 'grooming salon', 'marketing intern', 'accounting clerk', 'content mf', 'southendonsea traineeships', 'visual designer', 'marketing assistant', 'engagement manager', 'sponsorship businessfriendcom', 'customer service']

20 Rake unstemmed keywords in company_profile of REAL
['en vertu', 'family violence', 'quantitative analysis', 'staffing company', 'windows server', 'net framework', 'current efforts', 'consumer media', 'oracle technologies', 'fallarrest system', 'que simplifiquen', 'friendly service', 'medical insurance', 'skyplan group', 'service offerings', 'greek market', 'technical companies', 'south delhi', 'maintain applications', 'web mobile']

20 Rake unstemmed keywords in description of REAL
['de vacature', 'de wereldleider', 'ci od', 'met ervaring', 'het boekja

# keywords와 본문 text 관계 -> counting

In [10]:
def text_c_to_j(text):
    tokenizer = nltk.tokenize.RegexpTokenizer(r'\w+')
    ss = SnowballStemmer('english')
    stop_words = stopwords.words('english')
    
    text_list = tokenizer.tokenize(text)
    text_list = [ss.stem(x) for x in text_list if x not in stop_words]
    result = ' '.join(text_list)
    
    return result

In [88]:
text_df = text_cleaned.copy(deep = True)

for col in text_df.columns[:-1]:
    text_df[col] = text_df[col].apply(lambda x : text_c_to_j(x))

text_df.head()

Unnamed: 0,title,company_profile,description,requirements,benefits,fraudulent
0,market intern,creat groundbreak award win cook site support ...,fast grow jame beard award win onlin food comm...,experi content manag system major plus blog co...,unspecifi,0
1,custom servic cloud video product,second world cloud video product servic second...,organis focus vibrant awesom passion custom se...,expect key respons communic client second team...,get usthrough part second team gain experi wor...,0
2,commiss machineri assist cma,valor servic provid workforc solut meet need c...,client locat houston activ seek experienc comm...,implement pre commiss commiss procedur rotari ...,unspecifi,0
3,account execut washington dc,passion improv qualiti life geographi heart ev...,compani esri environment system research insti...,educ bachelor master gis busi administr relat ...,cultur anyth corpor collabor creativ environ p...,0
4,bill review manag,spotsourc solut llc global human capit manag c...,job titl item review managerloc fort worth tx ...,qualif rn licens state texasdiploma bachelor s...,full benefit offer,0


In [89]:
F_RATIO = (df.fraudulent == 1).sum() / df.shape[0]
T_RATIO = (df.fraudulent == 0).sum() / df.shape[0]

In [90]:
# TF-IDF counting
def tfidf_keyword_count(df, col, topn, target_is_fake = True):
    k_list = tfidf_stemmed_keywords(df, col, topn, target_is_fake = target_is_fake)
    
    f_count = []
    r_count = []
    for keyword in k_list:
        f_count.append(df[col][df.fraudulent == 1].apply(lambda x : x.count(keyword)).sum())
        r_count.append(df[col][df.fraudulent == 0].apply(lambda x : x.count(keyword)).sum())
    
    result = pd.DataFrame({'keyword' : k_list})
    f_c_ratio = f_count / F_RATIO
    r_c_ratio = r_count / T_RATIO
    
    result['fake_count'] = f_c_ratio / (f_c_ratio + r_c_ratio)
    result['real_count'] = r_c_ratio / (f_c_ratio + r_c_ratio)
    
    return result

def tfidf_select_eff_keywords(df, col, topn, percent = 0.9, target_is_fake = True):
    k_count = tfidf_keyword_count(df, col, topn, target_is_fake)
    tar_col = 'fake_count' if target_is_fake else 'real_count'
    
    eff_k_list = []
    for i, count in enumerate(k_count[tar_col]):
        if count >= percent:
            eff_k_list.append(k_count.keyword[i])
    
    return eff_k_list

In [91]:
for col in text_df.columns[:-1]:
    print('efficient keywords of FAKE in {}'.format(col))
    print(tfidf_select_eff_keywords(text_df, col, 30))
    print()

efficient keywords of FAKE in title
['optician', 'optic', 'geologist']

efficient keywords of FAKE in company_profile
['accion', 'netgear', 'thame']

efficient keywords of FAKE in description
['webcam', 'taipei', 'scaffold', 'hyatt', 'paycheck']

efficient keywords of FAKE in requirements
['typist', 'church', 'glass', 'cfr', 'maritim', 'teachabl', 'abilitymust', 'culinari']

efficient keywords of FAKE in benefits
['fulltim', 'paybonus', 'sharingbonus', 'offvac', 'zero', 'femal', 'gbp', 'male']



In [92]:
for col in text_df.columns[:-1]:
    print('efficient keywords of REAL in {}'.format(col))
    print(tfidf_select_eff_keywords(text_df, col, 30, target_is_fake = False))
    print()

efficient keywords of REAL in title
[]

efficient keywords of REAL in company_profile
['medic', 'research', 'technic', 'environ', 'privat']

efficient keywords of REAL in description
[]

efficient keywords of REAL in requirements
['mso']

efficient keywords of REAL in benefits
[]

