# Compile Statements into Markdown

*DTU - Explore the controversy about Energy Island*

**Goal**: Compile the spreadsheet data into a big Markdown file

**Purpose**: Feed into NotebookLM or another AI assistant.

**How to use**:
- Edit settings then run all
- Wait for each cell to run

## Code

(You don't have to understand what's going on here, but feel free to take a look)

In [None]:
# File URL
settings = {}
#settings['statements_dataset'] = '../../data/Actor statement dataset.csv' # Edit path to online or local file
settings['statements_dataset'] = 'https://jacomyma.github.io/dtu-sts-material/data/Actor%20statement%20dataset.csv'

### Install stuff
Note: already installed libraries will not be reinstalled, don't worry about re-running that.

In [None]:
!pip install pandas

In [None]:
# Import necessary libraries
import pandas as pd
from datetime import datetime

### Load data from the corpus

In [None]:
# Load dataset
df = pd.read_csv(settings['statements_dataset'], dtype=str)
df = df.fillna('')

# Set 'Year' column to int
df['Year'] = df['Year'].replace('', pd.NA)
df['Year'] = pd.to_numeric(df['Year'], errors='coerce').astype(pd.Int64Dtype())

# Set 'X', 'Y' and 'Size' columns to float
df['X'] = pd.to_numeric(df['X'], errors='coerce')
df['Y'] = pd.to_numeric(df['Y'], errors='coerce')
df['Size'] = pd.to_numeric(df['Size'], errors='coerce')

print('Data loaded.')

# Display dataframe for monitoring purposes
#df

### Generate Markdown

In [None]:
def fillMarkdownTemplate(id, actorName, actorStatement, publicationDate, source):
  return f'''# {id}

{actorName}, stated on {source}, the {publicationDate}:

{actorStatement}


'''

def makeMarkdown(df, filename):
  # Fill template for each row
  markdown = ''
  for index, row in df.iterrows():
    actor = df.at[index, 'Actor']
    if df.at[index, 'Representative of']:
      actor += ", "+df.at[index, 'Representative of']
    if df.at[index, 'Actor context']:
      actor += f" ({df.at[index, 'Actor context']})"

    source = df.at[index, 'Source name']
    if df.at[index, 'Source type']:
      source += " ("+df.at[index, 'Source type']+")"
    markdown += fillMarkdownTemplate(df.at[index, 'id'], actor, df.at[index, 'Statement'], df.at[index, 'Date of publication'], source)

  # Save markdown as file
  with open(filename, 'w') as f:
    f.write(markdown)
  print(f'File created: {filename}.')

In [None]:
# Generate the files
batch_size = 1000
for i in range(0, len(df), batch_size):
    df_batch = df.iloc[i:i + batch_size]
    filename = f'Actor statements {i // batch_size + 1}.md'
    makeMarkdown(df_batch, filename)

print('Done.')