In [4]:
import re
import os
import duckdb
import pandas as pd
from datetime import datetime

def import_data():
    
    folder_path = '../../Dealt With/'
    pattern = r'([A-Za-z]+)-D-(\d{2}-\d{5})(R\d+)\s\((.*?)\)\s(\d{4})-(\d{2})-(\d{2})\.pdf'
    
    if not os.path.exists(folder_path):
        print(f"Folder '{folder_path}' does not exist.")
        return

    files = os.listdir(folder_path)
    file_data = []

    for file_name in files:
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path):  # Ensure it's a file, not a directory
            match = re.search(pattern, file_name)
            if match:
                file_data.append({
                    'Name': file_name,
                    'MS_Number': f"{match.group(1)}-D-{match.group(2)}{match.group(3)}", 
                    'Version': match.group(3),
                    'Year': int(match.group(5)),
                    'Editor': match.group(4),
                    'Journal': match.group(1)
                })

    if file_data:
        df = pd.DataFrame(file_data)

        # Connect to DuckDB and insert data
        con = duckdb.connect(database='reviews.duckdb')

        con.execute("""
        CREATE TABLE IF NOT EXISTS reviews (
            Name STRING,
            MS_Number STRING,
            Version STRING,
            Year INTEGER,
            Editor STRING,
            Journal STRING
        )""")
        
        # Insert new data into the table
        con.execute("INSERT INTO reviews SELECT * FROM df")

        con.close()
        print("Data imported successfully into DuckDB.")
    else:
        print("No matching files found.")

def main():
    import_data()

if __name__ == "__main__":
    main()

# Load and print table contents
con = duckdb.connect(database='reviews.duckdb')
df = con.execute("SELECT * FROM reviews").fetchdf()  
con.close()

print(df.head())  # Print first few rows

Data imported successfully into DuckDB.
                                            Name             MS_Number  \
0  EUFOCUS-D-16-00171R1 (Vickers) 2016-11-24.pdf  EUFOCUS-D-16-00171R1   
1  EUFOCUS-D-16-00171R2 (Vickers) 2017-01-26.pdf  EUFOCUS-D-16-00171R2   
2  EUFOCUS-D-16-00171R3 (Vickers) 2017-03-01.pdf  EUFOCUS-D-16-00171R3   
3  EURUROL-D-14-01343R3 (Vickers) 2016-01-17.pdf  EURUROL-D-14-01343R3   
4  EURUROL-D-15-00807R2 (Sjoberg) 2015-09-12.pdf  EURUROL-D-15-00807R2   

  Version  Year   Editor  Journal  
0      R1  2016  Vickers  EUFOCUS  
1      R2  2017  Vickers  EUFOCUS  
2      R3  2017  Vickers  EUFOCUS  
3      R3  2016  Vickers  EURUROL  
4      R2  2015  Sjoberg  EURUROL  
