In [1]:
import nltk

import pandas as pd
import numpy as np

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel


In [2]:
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
nltk.download("wordnet")
nltk.download("stopwords")

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

verb_codes = {"VB", "VBD", "VBG", "VBN", "VBP", "VBZ"}

[nltk_data] Downloading package punkt to /home/vscode/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/vscode/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /home/vscode/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package stopwords to /home/vscode/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
cs_conferences = pd.read_pickle("/workspaces/VRA/conference_rec/wikicfp_cs.pkl")


In [9]:
cs_rec = cs_conferences[["Conference Title", "WikiCFP Tags", "Conference Description"]]
cs_rec.columns = ["title", "tags", "description"]
cs_rec = cs_rec.set_index("title")
cs_rec["soup"] = cs_rec["tags"] + " " + cs_rec["description"]

In [11]:
def preprocess_sentences(text):
    text = text.lower()
    temp_sent = []
    words = nltk.word_tokenize(text)
    tags = nltk.pos_tag(words)
    for i, word in enumerate(words):
        if tags[i][1] in verb_codes:
            lemmatized = lemmatizer.lemmatize(word, 'v')
        else:
            lemmatized = lemmatizer.lemmatize(word)
        if lemmatized not in stop_words and lemmatized.isalpha():
            temp_sent.append(lemmatized)
            
    finalsent = ' '.join(temp_sent)
    finalsent = finalsent.replace("n't", " not")
    finalsent = finalsent.replace("'m", " am")
    finalsent = finalsent.replace("'s", " is")
    finalsent = finalsent.replace("'re", " are")
    finalsent = finalsent.replace("'ll", " will")
    finalsent = finalsent.replace("'ve", " have")
    finalsent = finalsent.replace("'d", " would")
    return finalsent

cs_rec["processed_soup"] = cs_rec["soup"].apply(preprocess_sentences)


In [15]:
tfidfvec = TfidfVectorizer()
tfidf_model = tfidfvec.fit_transform(cs_rec["processed_soup"])
cos_sim = linear_kernel(tfidf_model, tfidf_model)

In [16]:
indices = pd.Series(cs_rec.index)

def recommendations(title, cosine_sim = cos_sim):
    recommended_conferences = []
    index = indices[indices == title].index[0]
    similarity_scores = pd.Series(cosine_sim[index]).sort_values(ascending = False)
    top_10_conferences = list(similarity_scores.iloc[1:11].index)
    for i in top_10_conferences:
        recommended_conferences.append(list(cs_rec.index)[i])
    return recommended_conferences

In [17]:
recommendations("CLOUD 2021 : 10th International Conference on Cloud Computing: Services and Architecture")

['IJCCSA 2021 : International Journal on Cloud Computing: Services and Architecture ',
 'CLBD 2021 : 2nd International Conference on Cloud and Big Data',
 'CBIoT 2021 : 2nd International Conference on Cloud, Big Data and IoT ',
 'CLSB  2021 : 2nd International Conference on Cloud Computing, Security and Blockchain ',
 'CBW  2021 : 2nd International Conference on Cloud, Big Data and Web Services ',
 'IBCOM  2021 : 2nd International Conference on IoT, Blockchain & Cloud Computing',
 'CCSEA 2021 : 11th International Conference on Computer Science, Engineering and Applications',
 'EMSA  2021 : 10th International Conference on Embedded Systems and Applications ',
 'ICCSEA 2021 : 11th International Conference on Computer Science, Engineering and Applications ',
 'CSIT 2021 : 8th International Conference on Computer Science and Information Technology ']

In [18]:
recommendations("ECIJ 2021 : Electrical & Computer Engineering: An International Journal")

['EEIEJ 2021 : Emerging Trends in Electrical, Electronics & Instrumentation Engineering: An international Journal',
 'ELELIJ 2021 : Electrical and Electronics Engineering: An International Journal ',
 'ADEIJ 2021 : Advances in Engineering: an International Journal ',
 'CSEIJ 2021 : Computer Science & Engineering: An International Journal',
 'IJCSEIT 2021 : International Journal of Computer Science, Engineering and Information Technology',
 'MLAIJ 2021 : Machine Learning and Applications: An International Journal ',
 'IJSEA 2021 : International Journal of Software Engineering & Applications - ERA 2018 Indexed',
 'IJCSEA 2021 : International Journal of Computer Science, Engineering and Applications ',
 'IJACEEE 2021 :  International Journal of Applied Control, Electrical and Electronics Engineering ',
 'IJCTCM 2021 : International Journal of Control Theory and Computer Modelling ']