In [1]:
#Parsing tools
from lxml import etree
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import re

#Data handling:
import pandas as pd
import json
import numpy as np
from collections import defaultdict

#System
import os
from io import StringIO, BytesIO

#Connection
import mysql.connector as my
import pyodbc
import requests

In [37]:
class mysql_connector:
    def __init__(self, server, database):
        self.server = server
        self.database = database

        self.cnxn_str = ("Driver={ODBC Driver 17 for SQL Server};"
                    f"Server={server};"
                    f"Database={database};"
                    "Trusted_Connection=yes;")
        self.cnxn = pyodbc.connect(self.cnxn_str)
        self.cursor = self.cnxn.cursor()

    def readtable_from_mysql(self, tablename):
        db_df = pd.read_sql(f"SELECT * FROM dbo.{tablename};", self.cnxn)
        self.cnxn.commit()
        return db_df
    
    def create_table_in_sql(self, tablename, col_list_with_types):
        col_string = ', '.join(col_list_with_types)
        self.cursor.execute(f"CREATE TABLE [{tablename}] ({col_string})")
        self._commit()
    
    def upload_to_mysql(self, table_name, col_list, val_list):
        col_string = ', '.join(col_list)
        str_for = '?, ' * len(col_list)

        try:
            sql = f"INSERT INTO {table_name} ({col_string}) VALUES ({str_for[:-2]})"
            self.cursor.fast_executemany = False
            self.cursor.executemany(sql, val_list)
        except:
            raise Exception('Executefast failed"')
                    
        self._commit()

    def does_table_exist(self, tablename):
        sql = f"SELECT table_name FROM information_schema.tables WHERE table_name = '{tablename}';"
        self.cursor.execute(sql)
        table_exists = self.cursor.fetchone()
        return True if table_exists else False
    
    def _commit(self):
        self.cnxn.commit()

def db_upload(df, col_list, col_list_with_types, table_name, database_name):
    db_obj  = mysql_connector('DK2CPHTIS01\TIS', f'{database_name}')
    val_list = df.replace(np.nan, None, regex=True).values.tolist()

    #Create Table:
    if not db_obj.does_table_exist(f'{table_name}'):
        #Table does not exist. Create one:
        db_obj.create_table_in_sql(f'{table_name}', col_list_with_types)

    db_obj.upload_to_mysql(f'{table_name}', col_list, val_list)

def db_read(table_name, database_name):
    db_obj  = mysql_connector('DK2CPHTIS01\TIS', f'{database_name}')
    return db_obj.readtable_from_mysql(f'{table_name}')

In [3]:
database_name = 'testDatabase'

#Read CELEX from database
df = db_read('VerdictsSOAP', database_name)
celex = df['CELEX'].values.tolist()
links = df['Link'].values.tolist()
articles = []

#Parse articles
for link in links: 
    try:
        html = requests.get(link).text
    except:
        articles.append('')
        continue
    
    soup = BeautifulSoup(html)
    
    #super elements for tags (footnotes)
    super_elements = soup.find_all(class_="super")
    for element in super_elements:
        original_text = element.get_text()
        modified_text = "##" + original_text
        element.string = modified_text

    #for bold Â´= class: stri-art
    bold_elements = soup.find_all(class_="sti-art")
    for element in bold_elements:
        original_text = element.get_text()
        # print(original_text)
        modified_text = "%#" + original_text + "%#"
        element.string = modified_text

    article = []
    for text in soup.find_all('p'):
        article.append(text.getText())
    
    articles.append((' '.join(article)))

df_with_articles = pd.DataFrame({'Celex':celex, 'Link':links, 'Articles':articles})

#Upload to Database
col_list = ['CELEX', 'Link', 'Articles']
col_list_with_types = ['CELEX text', 'Link text', 'Articles text']
db_upload(df_with_articles, col_list, col_list_with_types, 'VerdictsRequest', database_name)


  db_df = pd.read_sql(f"SELECT * FROM dbo.{tablename};", self.cnxn)


Exception: Executefast failed"