In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from dotenv import load_dotenv
import os
import dateutil.parser as parser
from webdav4.fsspec import WebdavFileSystem
from fsspec.implementations.local import LocalFileSystem
from ibots_db.schema import ConsultingReport
from ibots_db import update_all, load

In [3]:
# Setting parameters for papermill

raw_dir = 'data/raw/'
env_usr = 'REPORT_USR'
env_pwd = 'REPORT_PWD'

## Functions

In [4]:
from docx import Document
from pathlib import Path


def process(reports_path: list):
    consultants = []
    
    for report_path in reports_path:
        consultants.append(extract_report_data(report_path))                
    return consultants

def extract_report_data(report_path):
    document = Document(report_path)
    report_data = reset_report_data()
    reports = []
    current_section = None

    for para in document.paragraphs:
        text = para.text.strip()
        
        if text.startswith('Type:'):
            if report_data['type']:
                reports.append(report_data)
                report_data = reset_report_data()
            report_data['type'] = clean_text(text, 'Type:')
            current_section = 'content'
        elif text.startswith('Scholar:'):
            report_data['scholar'] = clean_text(text, 'Scholar:')
        elif text.startswith('Date:'):
            report_data['date'] = clean_text(text, 'Date:')
        elif text.startswith('Topic:'):
            report_data['topic'] = clean_text(text, 'Topic:')
        elif text.startswith('Content:'):
            report_data['content'] = clean_text(text, 'Content:')
            current_section = 'content'
        elif current_section == 'content':
            report_data['content'] += '\n' + text

    if report_data['type']:
        reports.append(report_data)

    return reports

def clean_text(text, prefix):
    return text.replace(prefix, '').strip()

def reset_report_data():
    return {'type': '', 'scholar': '', 'date': '', 'topic': '', 'content': ''}

## Download reports from sciebo

In [5]:
load_dotenv()
USR = os.getenv(env_usr)
PWD = os.getenv(env_pwd)
fs = WebdavFileSystem("https://uni-bonn.sciebo.de/public.php/webdav", auth=(USR, PWD))
fs.download('/', raw_dir, recursive=True)

## Process reports

In [6]:

fs_raw = LocalFileSystem()
notebooks = fs_raw.ls(raw_dir, detail=False)
notebooks = notebooks[1:] 
cons = process(reports_path=notebooks)
cons
consultant_names = [Path(notebook).stem for notebook in notebooks]

to_dt = parser.parse
reports = [entry | {'consultant': name, 'date': to_dt(entry['date'])} for name, con in zip(consultant_names, cons) for entry in con]

entries = [ConsultingReport(**entry) for entry in reports]
entries

write_entry = {}
for ind, entry in enumerate(entries):
    key = entry.consultant + '_' + str(ind).zfill(3) + '_' + entry.date.strftime('%Y-%m-%d')
    write_entry[key] = entry

update_all(key='consulting_reports', data=write_entry)


In [9]:
load().consulting_reports

{'mohammad_000_2023-08-30': ConsultingReport(consultant='mohammad', content='Jens Tillmann is hosting a workshop next week with 20+ participants. The workshop has been allocated 1 hour 15 minutes, which makes accessibility of the tool and data an important and challenging aspect of the design. He would like to discuss and get some feedback on best practices and tips on how to prepare the software (A-SOiD) demo and example data.\n\nOutcomes: We discussed several aspects and potential ideas for making the workshop more accessible. For instance, using Gitpod environment to minimize time spent on setup. Furthermore, we also discussed ideas to make the content delivery more efficient and fun through discussions.\n\nNext steps: NA\n\nTimeline: Workshop design should be finalized within 2 to 3 days, as the event is quickly approaching.\n\nUsers: Jens Tillmann and the participants\n\nRelevant links:\nGitHub repo for the workshop: https://github.com/JensBlack/A-SOID_workshop', date=datetime.dat