### Base class for SQL writer

In [None]:
# ##### MySQL Writer Class
import pandas as pd
import pymysql
from datetime import datetime, timezone
import pytz

class MySQLWriter:
    """
    A class to handle MySQL database operations.
    """
    def __init__(self, host, user, password, database):
        # Initializes the MySQLWriter with connection parameters and ensures required tables exist.
        self.connection = pymysql.connect(
            host=host,
            user=user,
            password=password,
            charset="utf8mb4",
            cursorclass=pymysql.cursors.DictCursor
        )

        # Check if database exists, create if it doesn't
        self.create_database_if_not_exists(database)
        self.connection.select_db(database)

        # Create necessary tables if they do not exist
        self.create_table_volumes()
        self.create_table_archives()
        self.create_table_articles()
        self.create_table_contents()
        self.create_table_authors()
        self.create_table_authors_articles()


    def create_database_if_not_exists(self, database):
        with self.connection.cursor() as cursor:
            cursor.execute(f"CREATE DATABASE IF NOT EXISTS {database}")
        self.connection.commit()

    def add_import_timestamp(self, df):
        # Adds an 'import_timestamp' column to the DataFrame.
        df['import_date'] = datetime.now()
        return df

    def record_exists(self, table_name, primary_key_column, primary_key_value):
        # Checks if a record with the specified primary key exists in the given table.
        query = f"SELECT COUNT(1) AS count FROM {table_name} WHERE {primary_key_column} = %s"
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query, (primary_key_value,))
                result = cursor.fetchone()
                return result['count'] > 0
        except Exception as e:
            print(f"Error checking record existence in table '{table_name}':", e)
            return False

    def close_connection(self):
        # Close the MySQL database connection.
        if self.connection:
            self.connection.close()
            print("MySQL connection closed.")

    # -----------------------------------------------------------------------------------
    # Archives Table Methods
    # -----------------------------------------------------------------------------------

    def create_table_archives (self):
        # Creates the 'archives' table in the database if it does not already exist.
        create_table_query = """
        CREATE TABLE IF NOT EXISTS archives (
            archive_url VARCHAR(500) PRIMARY KEY,
            volume_number VARCHAR(255),
            archive_title VARCHAR(500),
            archive_title_clean VARCHAR(500),   
            archive_publication_date DATE,
            editor TEXT,
            import_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            status VARCHAR(10)
        );
        """
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(create_table_query)
                self.connection.commit()
                print("Table 'archives' is ready.")
        except Exception as e:
            print("Error creating table 'archives':", e)
    
    def read_table_archives (self, status=None):
        if status == 'PENDING':
            query = "SELECT * FROM archives WHERE STATUS ='PENDING' order by archive_url;"
        else:
            query = "SELECT * FROM archives order by archive_url;"

        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)
            # print("Successfully read data from 'archives' table.")
            return df
        except Exception as e:
            print(f"Error reading 'archives' table: {e}")
            return pd.DataFrame()  # Return an empty DataFrame in case of error
    
    def insert_archives (self, archives_df):
        # Inserts into the 'archives' table

        for _, row in archives_df.iterrows():
            archive_url = row['archive_url']
            if not self.record_exists("archives", "archive_url", archive_url):
                query = """
                INSERT INTO archives (archive_url, volume_number, archive_title, archive_publication_date, editor, import_date, status)
                VALUES (%s, %s, %s, %s, %s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    volume_number = VALUES (volume_number),
                    archive_title = VALUES (archive_title),
                    archive_publication_date = VALUES (archive_publication_date),
                    editor = VALUES (editor),
                    import_date = VALUES (import_date),
                    status = VALUES (status)
                """
                try:
                    with self.connection.cursor() as cursor:
                        cursor.execute(query, (
                            row['archive_url'], row['volume_number'], row['archive_title'], 
                            row['archive_publication_date'], row['editor'], row['import_date'], row['status']
                        ))
                        self.connection.commit()
                        print(f"Inserted archive URL: {archive_url}")
                except Exception as e:
                    self.connection.rollback()
                    print(f"Error inserting archive URL '{archive_url}':", e)
            else:
                print(f"Archive URL '{archive_url}' already exists. Skipping.")

    def update_archives (self, archives_df):
        # Update 'archives' table

        for _, row in archives_df.iterrows():
            archive_url = row['archive_url']
            query = """
            INSERT INTO archives (archive_url, volume_number, archive_title, archive_publication_date, editor, import_date, status)
            VALUES (%s, %s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                volume_number = VALUES (volume_number),
                archive_title = VALUES (archive_title),
                archive_publication_date = VALUES (archive_publication_date),
                editor = VALUES (editor),
                import_date = VALUES (import_date),
                status = VALUES (status)
            """
            try:
                with self.connection.cursor() as cursor:
                    cursor.execute(query, (
                        row['archive_url'], row['volume_number'], row['archive_title'], 
                        row['archive_publication_date'], row['editor'], row['import_date'], row['status']
                    ))
                    self.connection.commit()
                    print(f"Updated archive_url: {archive_url}")
            except Exception as e:
                self.connection.rollback()
                print(f"Error updating archive_url '{archive_url}':", e)

    def update_archives_status (self, df):
        # Update 'archives' table

        update_query = """
        UPDATE archives
        SET status = %s WHERE archive_url = %s;
        """
        try:
            with self.connection.cursor() as cursor:
                for _, row in df.iterrows():
                    cursor.execute(update_query, (
                        row['status'], row['archive_url']
                    ))
                    self.connection.commit()
                    # print(row['article_url'], row['status'])
                print("Archives status is updated")
        except Exception as e:
            self.connection.rollback()
            print("Error updating archives status:", e)

    # -----------------------------------------------------------------------------------
    # Articles Table Methods
    # -----------------------------------------------------------------------------------

    def create_table_articles (self):
        # Creates the 'articles' table in the database if it does not already exist.
        create_table_query = """
        CREATE TABLE IF NOT EXISTS articles (
            article_url VARCHAR(600) PRIMARY KEY,
            article_title VARCHAR(500),
            article_title_clean VARCHAR(500),
            doi VARCHAR(500),
            article_publication_date DATE,
            author TEXT,
            author_clean TEXT,
            keyword TEXT,
            abstract TEXT,
            abstract_clean TEXT,
            archive_url VARCHAR(600),
            content_url VARCHAR(600),
            import_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            status VARCHAR(10),
            lang VARCHAR(10),
            abstract_clean_en TEXT
        );
        """
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(create_table_query)
                self.connection.commit()
                print("Table 'articles' is ready.")
        except Exception as e:
            print("Error creating table 'articles':", e)

    def read_table_articles (self, status):
        if(status=="PENDING"):
            query = f"SELECT * FROM articles where status='PENDING';"
        else:
            query = f"SELECT * FROM articles;"
        # query = f"SELECT * FROM articles where status is null or status ='';"
        # query = "SELECT * FROM articles WHERE abstract = '';"
        # query = "SELECT * FROM articles where article_url='https://firstmonday.org/ojs/index.php/fm/article/view/10005';"
    
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)
            # print("Successfully read data from 'articles' table.")
            return df
        except Exception as e:
            print(f"Error reading 'articles' table: {e}")
            return pd.DataFrame()  # Return an empty DataFrame in case of error

    def insert_articles (self, articles_df):
        # Inserts into the 'articles' table

        for _, row in articles_df.iterrows():
            article_url = row['article_url']
            if not self.record_exists("articles", "article_url", article_url):
                query = """
                INSERT INTO articles (article_url, article_title, doi, article_publication_date, author, keyword, abstract, archive_url, content_url, import_date, status)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    article_title = VALUES (article_title),
                    doi = VALUES (doi),
                    article_publication_date = VALUES (article_publication_date),
                    author = VALUES (author),
                    keyword = VALUES (keyword),
                    abstract = VALUES (abstract),
                    archive_url = VALUES (archive_url),
                    content_url = VALUES (content_url),
                    import_date = VALUES (import_date),
                    status = VALUES (status)
                """
                try:
                    with self.connection.cursor() as cursor:
                        cursor.execute(query, (
                            row['article_url'], row['article_title'], row['doi'],
                            row['article_publication_date'], row['author'], 
                            row['keyword'], row['abstract'],
                            row['archive_url'], row['content_url'],
                            row['import_date'], row['status']
                        ))
                        self.connection.commit()
                        print(f"Inserted article URL: {article_url}")
                except Exception as e:
                    self.connection.rollback()
                    print(f"Error inserting article URL '{article_url}':", e)
            else:
                print(f"Article URL '{article_url}' already exists. Skipping.")

    def update_articles (self, articles_df):
        # Update 'articles' table

        for _, row in articles_df.iterrows():
            article_url = row['article_url']
            query = """
            INSERT INTO articles (article_url, article_title, doi, article_publication_date, author, keyword, abstract, archive_url, content_url, import_date, status)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                article_title = VALUES (article_title),
                doi = VALUES (doi),
                article_publication_date = VALUES (article_publication_date),
                author = VALUES (author),
                keyword = VALUES (keyword),
                abstract = VALUES (abstract),
                archive_url = VALUES (archive_url),
                content_url = VALUES (content_url),
                import_date = VALUES (import_date), 
                status = VALUES (status)
            """
            try:
                with self.connection.cursor() as cursor:
                    cursor.execute(query, (
                            row['article_url'], row['article_title'], row['doi'],
                            row['article_publication_date'], row['author'], 
                            row['keyword'], row['abstract'],
                            row['archive_url'], row['content_url'],
                            row['import_date'], row['status']
                    ))
                    self.connection.commit()
                    print(f"Updated article_url: {article_url}")
                    
            except Exception as e:
                self.connection.rollback()
                print(f"Error updating article_url '{article_url}':", e)

    # -----------------------------------------------------------------------------------
    # Contents Table Methods
    # -----------------------------------------------------------------------------------

    def create_table_contents (self):
        # Creates the 'contents' table in the database if it does not already exist.
        create_table_query = """
        CREATE TABLE IF NOT EXISTS contents (
            content_url VARCHAR(600) PRIMARY KEY,
            iframe_url VARCHAR(600),
            content LONGTEXT,
            content_clean LONGTEXT,
            lang VARCHAR(10),
            content_clean_en LONGTEXT
        );
        """
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(create_table_query)
                self.connection.commit()
                print("Table 'contents' is ready.")
        except Exception as e:
            print("Error creating table 'contents':", e)

    def read_table_contents (self):
        query = "SELECT * FROM contents;"
    
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)
            # print("Successfully read data from 'contents' table.")
            return df
        except Exception as e:
            print(f"Error reading 'contents' table: {e}")
            return pd.DataFrame()  # Return an empty DataFrame in case of error
    
    def read_table_articles_wo_contents (self):
        query = "SELECT content_url FROM articles where content_url not in (SELECT content_url from contents) and content_url <> '' and content_url is not null;"
    
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)
            # print("Successfully read data from 'contents' table.")
            return df
        except Exception as e:
            print(f"Error reading 'contents' table: {e}")
            return pd.DataFrame()  # Return an empty DataFrame in case of error
        
    def insert_contents (self, contents_df):
        # Inserts into the 'contents' table

        for _, row in contents_df.iterrows():
            content_url = row['content_url']
            if not self.record_exists("contents", "content_url", content_url):
                query = """
                INSERT INTO contents (content_url, iframe_url, content)
                VALUES (%s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    content_url = VALUES (content_url),
                    iframe_url = VALUES (iframe_url),
                    content = VALUES (content)
                """
                try:
                    with self.connection.cursor() as cursor:
                        cursor.execute(query, (
                            row['content_url'], row['iframe_url'], row['content']
                        ))
                        self.connection.commit()
                        print(f"Inserted article URL: {content_url}")
                except Exception as e:
                    self.connection.rollback()
                    print(f"Error inserting article URL '{content_url}':", e)
            else:
                print(f"Article URL '{content_url}' already exists. Skipping.")

    def update_archive_clean (self, df):
        update_query = """
        UPDATE archives
        SET archive_title_clean=%s
        WHERE archive_url = %s;
        """
        try:
            with self.connection.cursor() as cursor:
                for _, row in df.iterrows():
                    cursor.execute(update_query, (
                        row['archive_title_clean'], row['archive_url']
                    ))
                    self.connection.commit()
                    # print(row['archive_title_clean'], row['archive_url'])
                print("Archive preprocessing is completed")
        except Exception as e:
            self.connection.rollback()
            print("Error updating archive_title_clean:", e)

    def update_article_clean (self, df):
        update_query = """
        UPDATE articles
        SET article_title_clean=%s, author_clean = %s, 
        abstract_clean = CASE WHEN abstract_clean IS NULL THEN '' ELSE %s END,
        lang =  %s, abstract_clean_en = %s
        WHERE article_url = %s;
        """
        try:
            with self.connection.cursor() as cursor:
                for _, row in df.iterrows():
                    cursor.execute(update_query, (
                        row['article_title_clean'],  row['author_clean'], 
                        row['abstract_clean'], 
                        row['lang'], row['abstract_clean_en'], 
                        row['article_url']
                    ))
                    self.connection.commit()
                    # print(row['author_clean'], row['abstract_clean'], row['article_url'])
                print("Articles preprocessing is completed")
        except Exception as e:
            self.connection.rollback()
            print("Error updating article_title_clean, author_clean, or abstract_clean:", e)
    
    def update_content_clean (self, df):
        update_query = """
        UPDATE contents
        SET content_clean = %s, content_clean_en = %s, lang = %s
        WHERE content_url = %s;
        """
        try:
            with self.connection.cursor() as cursor:
                for _, row in df.iterrows():
                    cursor.execute(update_query, (
                        row['content_clean'], row['content_clean_en'],  row['lang'],
                        row['content_url']
                    ))
                    self.connection.commit()
                    # print(row['content_clean'], row['content_url'])
                print("Contents preprocessing is completed.")
        except Exception as e:
            self.connection.rollback()
            print("Error updating content_clean:", e)

    # -----------------------------------------------------------------------------------
    # Volumes Table Methods
    # -----------------------------------------------------------------------------------

    def create_table_volumes (self):
        # Creates the 'contents' table in the database if it does not already exist.
        create_table_query = """
        CREATE TABLE IF NOT EXISTS volumes (
            volume_number VARCHAR(200) PRIMARY KEY,
            volume_name VARCHAR(200),
            volume_year INT
        );
        """
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(create_table_query)
                self.connection.commit()
                print("Table 'volumes' is ready.")
        except Exception as e:
            print("Error creating table 'volumes':", e)

    def read_table_volumes (self):
        query = "SELECT * FROM volumes;"
    
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)
            # print("Successfully read data from 'volumes' table.")
            return df
        except Exception as e:
            print(f"Error reading 'volumes' table: {e}")
            return pd.DataFrame()  # Return an empty DataFrame in case of error
        
    def insert_volumes (self, volumes_df):
        # Inserts into the 'contents' table

        for _, row in volumes_df.iterrows():
            volume_number = row['volume_number']
            if not self.record_exists("volumes", "volume_number", volume_number):
                query = """
                INSERT INTO volumes (volume_number, volume_name, volume_year)
                VALUES (%s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    volume_number = VALUES (volume_number),
                    volume_name = VALUES (volume_name),
                    volume_year = VALUES (volume_year)
                """
                try:
                    with self.connection.cursor() as cursor:
                        cursor.execute(query, (
                            row['volume_number'], row['volume_name'], row['volume_year']
                        ))
                        self.connection.commit()
                        print(f"Inserted volume_number: {volume_number}")
                except Exception as e:
                    self.connection.rollback()
                    print(f"Error inserting article URL '{volume_number}':", e)
            else:
                print(f"Volume_number '{volume_number}' already exists. Skipping.")

    # -----------------------------------------------------------------------------------
    # Authors Table Methods
    # -----------------------------------------------------------------------------------

    def create_table_authors (self):
        # Creates the 'authors' table in the database if it does not already exist.
        create_table_query = """
        CREATE TABLE IF NOT EXISTS authors (
            author_name VARCHAR(200) PRIMARY KEY
        );
        """
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(create_table_query)
                self.connection.commit()
                print("Table 'authors' is ready.")
        except Exception as e:
            print("Error creating table 'authors':", e)

    def read_table_authors (self):
        query = "SELECT * FROM authors;"
    
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)
            # print("Successfully read data from 'volumes' table.")
            return df
        except Exception as e:
            print(f"Error reading 'authors' table: {e}")
            return pd.DataFrame()  # Return an empty DataFrame in case of error
        
    def insert_authors (self, df):
        # Inserts into the 'contents' table

        for _, row in df.iterrows():
            author_name = row['author_name']
            if not self.record_exists("authors", "author_name", author_name):
                query = """
                INSERT INTO authors (author_name)
                VALUES (%s)
                ON DUPLICATE KEY UPDATE
                    author_name = VALUES (author_name)
                """
                try:
                    with self.connection.cursor() as cursor:
                        cursor.execute(query, (
                            row['author_name']
                        ))
                        self.connection.commit()
                        print(f"Inserted author name: {author_name}")
                except Exception as e:
                    self.connection.rollback()
                    print(f"Error inserting author name '{author_name}':", e)
            else:
                print(f"Author name '{author_name}' already exists. Skipping.")

    # -----------------------------------------------------------------------------------
    # Authors_Articles Table Methods
    # -----------------------------------------------------------------------------------

    def create_table_authors_articles (self):
        # Creates the table in the database if it does not already exist.
        create_table_query = """
        CREATE TABLE IF NOT EXISTS authors_articles (
            author_name VARCHAR(200) NOT NULL,
            article_url VARCHAR(500) NOT NULL
        );
        """
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(create_table_query)
                self.connection.commit()
                print("Table 'authors_articles' is ready.")
        except Exception as e:
            print("Error creating table 'authors_articles':", e)

    def read_table_authors_articles (self):
        query = "SELECT * FROM authors_articles;"
    
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)
            # print("Successfully read data from 'authors_articles' table.")
            return df
        except Exception as e:
            print(f"Error reading 'authors_articles' table: {e}")
            return pd.DataFrame()  # Return an empty DataFrame in case of error
        
    def insert_authors_articles (self, df):
        # Inserts into the 'authors_articles' table

        for _, row in df.iterrows():
            author_name = row['author_name']
            article_url = row['article_url']

            if not self.record_exists("authors", "author_name", author_name):
                query = """
                INSERT INTO authors_articles (author_name, article_url)
                VALUES (%s, %s)
                ON DUPLICATE KEY UPDATE
                    author_name = VALUES (author_name),
                    article_url = VALUES (article_url)
                """
                try:
                    with self.connection.cursor() as cursor:
                        cursor.execute(query, (
                            row['author_name'], row['article_url']
                        ))
                        self.connection.commit()
                        print(f"Inserted author_article: {author_name}, {article_url}")
                except Exception as e:
                    self.connection.rollback()
                    print(f"Error inserting author_article '{author_name}, {article_url}':", e)
            else:
                print(f"Author/article '{author_name}, {article_url}' already exists. Skipping.")

    # -----------------------------------------------------------------------------------
    # Export to Excel Methods
    # -----------------------------------------------------------------------------------
    
    def volumes_to_excel(self):
        # Reads the 'volumes' table from the database and returns it as pandas dataframe, then write to excel
        query = "SELECT * FROM volumes;"
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)

            # Write to excel
            excel_file_path = 'data/volumes.xlsx'
            df.to_excel(excel_file_path, index=False)
            print(f"Volumes has been exported to '{excel_file_path}' successfully.")
            
        except Exception as e:
            print(f"Error exporting 'volumes' table: {e}")
            
    def archives_to_excel(self):
        # Reads the 'archives' table from the database and returns it as pandas dataframe, then write to excel
        query = "SELECT * FROM archives order by archive_url;"
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)

            # Write to excel
            excel_file_path = 'data/archives.xlsx'
            df.to_excel(excel_file_path, index=False)
            print(f"Archives has been exported to '{excel_file_path}' successfully.")
            
        except Exception as e:
            print(f"Error exporting 'archives' table: {e}")

    def articles_to_excel(self):
        # Reads the 'articles' table from the database and returns it as pandas dataframe, then write to excel
        query = "SELECT * FROM articles order by archive_url;"
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)

            # Write to excel
            excel_file_path = 'data/articles.xlsx'
            df.to_excel(excel_file_path, index=False)
            print(f"Articles has been exported to '{excel_file_path}' successfully.")
            
        except Exception as e:
            print(f"Error exporting 'archives' table: {e}")

    def contents_to_excel(self):
        # Reads the 'contents' table from the database and returns it as pandas dataframe, then write to excel
        query = "SELECT * FROM contents;"
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)

            # Write to excel
            excel_file_path = 'data/contents.xlsx'
            df.to_excel(excel_file_path, index=False)
            print(f"Contents has been exported to '{excel_file_path}' successfully.")
            
        except Exception as e:
            print(f"Error exporting 'contents' table to Excel: {e}")

    def authors_to_excel(self):
        # Reads the 'authors' table from the database and returns it as pandas dataframe, then write to excel
        query = "SELECT * FROM authors;"
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)

            # Write to excel
            excel_file_path = 'data/authors.xlsx'
            df.to_excel(excel_file_path, index=False)
            print(f"Authors has been exported to '{excel_file_path}' successfully.")
            
        except Exception as e:
            print(f"Error exporting 'authors' table to Excel: {e}")

    
    def archives_articles_contents_to_excel(self):
        # Reads the 'archives', 'articles', 'contents' tables from the database and returns it as pandas dataframe, then write to excel
        query = """
            SELECT 
            articles.article_url, articles.article_title, articles.article_title_clean, articles.article_publication_date, 
            articles.author, articles.author_clean, articles.keyword, 
            articles.abstract, articles.abstract_clean, articles.abstract_clean_en, 
            articles.lang, 
            archives.archive_url, archives.archive_title, archives.archive_title_clean, archives.archive_publication_date, 
            contents.content_url, contents.content, contents.content_clean, 
            CONCAT(
                CASE 
                    WHEN IFNULL(articles.abstract_clean_en,'') != '' THEN  IFNULL(articles.abstract_clean_en,'')
                    ELSE  IFNULL(contents.content_clean,'')
                END, ' ',  IFNULL(articles.article_title_clean, ''), ' ', IFNULL(articles.keyword, ''), ' ', IFNULL(archives.archive_title_clean, '')
            ) AS abstract_content_clean_en
            FROM articles 
            LEFT JOIN archives ON articles.archive_url = archives.archive_url 
            INNER JOIN contents ON articles.content_url = contents.content_url
            WHERE articles.abstract_clean!='';
        """
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)

            # Write to excel
            excel_file_path = 'data/archives_articles_contents.xlsx'
            df.to_excel(excel_file_path, index=False)
            print(f"Archives_articles_contents has been exported to '{excel_file_path}' successfully.")
            
        except Exception as e:
            print(f"Error exporting 'archives_articles_contents' table to Excel: {e}")

    def authors_articles_to_excel(self):
        # Reads the authors_articles table from the database and returns it as pandas dataframe, then write to excel
        query = "SELECT * FROM authors_articles;"
        try:
            with self.connection.cursor() as cursor:
                cursor.execute(query)
                result = cursor.fetchall()
            # Convert the result into a Pandas DataFrame
            df = pd.DataFrame(result)

            # Write to excel
            excel_file_path = 'data/authors_articles.xlsx'
            df.to_excel(excel_file_path, index=False)
            print(f"Authors_articles has been exported to '{excel_file_path}' successfully.")
            
        except Exception as e:
            print(f"Error exporting 'authors_articles' table to Excel: {e}")
