In [1]:
import pandas as pd

In [2]:
class qualis:
    def __init__(self, categoria:str, df:pd.DataFrame):
        self.categoria = categoria
        self.qualis = df

    def get_url(self):
        url = ['http://www.wikicfp.com/cfp/servlet/tool.search?q=', '&year=t']
        cat = self.categoria
        url.insert(1, cat)
        url = ''.join(url)
        return url.replace(' ', '++')

    def wikiCFP(self) -> None:
        full_path = self.get_url()
        # planilha wikiCFP
        df = pd.read_html(full_path, header=[0])
        # somente conferencias 2023
        indice = df[2][df[2].Event=='Expired CFPs'].index
        
        if indice.empty:
            pass
        else:
            df[2] = df[2][df[2].index < indice[0]] 
        # dataframe com nomes das coferências
        df1 = df[2].iloc[::2]
        df1 = df1.drop(columns=['Where', 'Deadline'])
        df1.columns = ['Event', 'Conference']
        # exclui linhas pares e a ultima coluna
        df2 = df[2].iloc[1::2]
        # adicionando os nomes das conferencias
        df_wiki = df1.merge(df2, on='Event', how='left')

        df_wiki.loc[:,'Event'] = df_wiki.Event.replace('2023','', regex=True)
        df_wiki.reset_index(drop=True, inplace=True)
        # renomeando colunas
        df_wiki.columns = ['Sigla', 'Conferência', 'Quando', 'Onde', 'Deadline']
        # removing spaces 
        df_wiki.Sigla = df_wiki.Sigla.apply(lambda x:x.strip())

        return df_wiki

    def wikiQualis(self):
        df_wiki = self.wikiCFP()
        #df_qualis = self.Qualis()

        df_merged = df_wiki.merge(self.qualis, on='Sigla', how='left')
        
        df_merged = df_merged.fillna('-')
        # criando colunas com as data de inicio e fim das conferencias
        df_merged.insert(2, 'Início', df_merged.Quando.apply(lambda x: x.split('-')[0]))
        df_merged.insert(3, 'Fim', df_merged.Quando.apply(lambda x: x.split('-')[1]))
        # Limpando as colunas do deadline, remove data em parênteses
        df_merged.Deadline = df_merged.Deadline.apply(lambda x:x.split('(')[0].strip())
        # removendo coluna das datas compostas 
        df_merged.drop(columns='Quando', inplace=True)
        # removendo linhas sem deadline
        df_merged = df_merged[df_merged.Deadline.apply(lambda x: len(str(x)))>=11]
        # datetime formato
        df_merged[['Início', 'Fim', 'Deadline']] = df_merged[['Início', 'Fim', 'Deadline']].apply(pd.to_datetime, errors='coerce')
        # deixando as datas no formato d-m-y
        df1 = df_merged.select_dtypes('datetime64')
        df_merged[df1.columns] = df1.apply(lambda x: x.dt.strftime('%d-%m-%Y'))
        
        df_merged = df_merged.fillna('-')
        return df_merged
    


## Planilha Qualis


In [3]:
url_qualis = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTZsntDnttAWGHA8NZRvdvK5A_FgOAQ_tPMzP7UUf-CHwF_3PHMj_TImyXN2Q_Tmcqm2MqVknpHPoT2/pubhtml?gid=0&single=true'
            
df2 = pd.read_html(url_qualis, header=[1])
df_qualis = df2[0].drop(columns=['1', 'evento'])
# remove 1 linha (contem apenas NaN)
df_qualis = df_qualis.iloc[1:]
# renomeando colunas
df_qualis.columns = ['Sigla', 'Qualis']

## Fazendo Busca de conferências por categoria

In [4]:
categoria = ['security','cyber security', 'cybersecurity','network security', 
            'information security','computer security', 'artificial inteligence',             
            'data mining', 'internet applications', 'big data', 'computing', 'internet', 
            'computer science', 'data analytics', 'informatics', 'internet of things',
            'IOT', 'computer science', 'privacy', 'data',  'cloud', 'cloud security',
            'cloud architectures', 'cloud computing security','cloud services']

## Gerando os dataframes

In [5]:
dataframes = []
for cat in categoria:
    print(cat)
    print('-'*25)
    busca = qualis(categoria=cat, df=df_qualis)
    dataframe = busca.wikiQualis()
    dataframes.append(dataframe)

qualis_df = pd.concat(dataframes, ignore_index=True)

security
-------------------------
cyber security
-------------------------
cybersecurity
-------------------------
network security
-------------------------
information security
-------------------------
computer security
-------------------------
artificial inteligence
-------------------------
data mining
-------------------------
internet applications
-------------------------
big data
-------------------------
computing
-------------------------
internet
-------------------------
computer science
-------------------------
data analytics
-------------------------
informatics
-------------------------
internet of things
-------------------------
IOT
-------------------------
computer science
-------------------------
privacy
-------------------------
data
-------------------------
cloud
-------------------------
cloud security
-------------------------
cloud architectures
-------------------------
cloud computing security
-------------------------
cloud services
-------------------

## Salvando dataframe para formato xlsx (excel)

In [None]:
qualis_df.shape
qualis_df.drop_duplicates(subset='Sigla', keep='first', inplace=True)
qualis_df.to_excel("planilha_qualis.xlsx", index=False)