---

# import a paper from Zotero, extract data, save to database

___

## proof of concept - summarize each paper

In [1]:
import os
import csv
import requests
import time
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

# Retrieve your OpenAI API key from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Define the path to your _inbox folder
INBOX_PATH = r"D:\OneDrive\_Carnegie Mellon (CMU)\60 Academic\63 Literature Review\63.002 Literature Review Exports from Zotero\_inbox"

# OpenAI API endpoint for Chat Completions
OPENAI_URL = "https://api.openai.com/v1/chat/completions"

# Define the output CSV file path
CSV_FILE = r"literature_data\summaries.csv"

def generate_summary(paper_text):
    """
    Sends the content of paper.txt to the OpenAI Chat Completions endpoint
    and returns a summary of the paper. Includes error handling to check
    for unexpected API responses.
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}"
    }
    prompt_messages = [
        {
            "role": "system",
            "content": (
                "You are given the full text of a research paper. "
                "Summarize the paper according to the following instructions: "
                "1) Provide a concise abstract-like summary. "
                "2) Highlight the main contributions and conclusions. "
                "3) Use clear and accessible language. "
                "Make sure the summary captures the essence of the paper."
            )
        },
        {
            "role": "user",
            "content": paper_text
        }
    ]
    payload = {
        "model": "gpt-4o",
        "messages": prompt_messages,
        "temperature": 0.7
    }
    response = requests.post(OPENAI_URL, headers=headers, json=payload)
    data = response.json()
    
    # Debug: Print the full response if "choices" is not found
    if "choices" not in data:
        print("Error: Unexpected API response format:")
        print(data)
        # Return an empty string or handle as needed
        return ""
    
    return data["choices"][0]["message"]["content"]


def append_summary_to_csv(folder_name, summary):
    """
    Appends the folder name and summary to a CSV file.
    If the file does not exist, it creates one with a header.
    """
    file_exists = os.path.exists(CSV_FILE)
    with open(CSV_FILE, "a", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        # Write header if file is new
        if not file_exists:
            writer.writerow(["Folder", "Summary"])
        writer.writerow([folder_name, summary])


def main():
    if not os.path.exists(INBOX_PATH):
        print(f"Inbox path does not exist: {INBOX_PATH}")
        return

    # List all folders in the _inbox
    folder_names = [
        folder for folder in os.listdir(INBOX_PATH)
        if os.path.isdir(os.path.join(INBOX_PATH, folder))
    ]

    for folder in folder_names:
        folder_path = os.path.join(INBOX_PATH, folder)
        paper_file = os.path.join(folder_path, "paper.txt")

        if not os.path.exists(paper_file):
            print(f"Missing paper.txt in folder: {folder}")
            continue

        with open(paper_file, "r", encoding="utf-8") as pf:
            paper_text = pf.read()

        # Generate the summary using OpenAI
        summary = generate_summary(paper_text)
        
        # Append the summary to the CSV file
        append_summary_to_csv(folder, summary)
        print(f"Processed folder '{folder}' and appended summary to CSV.")
        
        # Wait for 6 seconds before processing the next folder
        time.sleep(6)

if __name__ == "__main__":
    main()


Processed folder 'Khaldi et al., 2016' and appended summary to CSV.
Processed folder 'Khodagholy et al., 2011' and appended summary to CSV.
Processed folder 'Middya et al., 2021' and appended summary to CSV.
Processed folder 'Middya et al., 2025' and appended summary to CSV.
Processed folder 'Sessolo et al., 2013' and appended summary to CSV.


## trial 1 - try to sport paper by specific metric

In [2]:
import os
import time
import csv
import json
import requests
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

# Retrieve your OpenAI API key from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Define the path to your _inbox folder and the output CSV file path
INBOX_PATH = r"D:\OneDrive\_Carnegie Mellon (CMU)\60 Academic\63 Literature Review\63.002 Literature Review Exports from Zotero\_inbox"
CSV_FILE = r"literature_data\literature_database_v1.csv"

# OpenAI API endpoint for Chat Completions
OPENAI_URL = "https://api.openai.com/v1/chat/completions"

# Dictionaries of materials and characterization methods
electrode_materials_tally = {
    "Polyethylene Terephthalate (PET)": 2,
    "Parylene (Parylene-C, Parylene-HT)": 7,
    "Polyimide (PI)": 3,
    "SU-8 Photoresist": 4,
    "Silicon (Si)": 4,
    "PDMS (Polydimethylsiloxane)": 2,
    "Gold": 5,
    "Platinum (Pt)": 5,
    "Graphene": 3,
    "Carbon Nanotubes (CNTs)": 3,
    "Indium Tin Oxide (ITO)": 2,
    "Titanium (Ti)": 2,
    "Silver": 1,
    "PEDOT:PSS": 4,
    "Nickel": 1,
    "Aluminum": 1,
    "Eutectic Gallium–Indium Alloy (EGaIn)": 1,
    "Chromium (Cr)": 3,
    "Polycarbonate (PC)": 1,
    "Styrene-Ethylene-Butylene-Styrene (SEBS)": 1,
    "Pluronic P123": 1,
    "Mesoporous Silica Nanoparticles": 1,
    "Gold Nanorods": 1,
    "Tungsten": 1,
    "ZnO Nanowires": 1,
    "Pyrolytic Carbon": 2,
}

functional_electrode_materials = {
    "Gold": 5,
    "Platinum (Pt)": 5,
    "Graphene": 3,
    "Carbon Nanotubes (CNTs)": 3,
    "Indium Tin Oxide (ITO)": 2,
    "Titanium (Ti)": 2,
    "Silver": 1,
    "PEDOT:PSS": 4,
    "Nickel": 1,
    "Aluminum": 1,
    "Eutectic Gallium–Indium Alloy (EGaIn)": 1,
    "Chromium (Cr)": 3,
    "Pluronic P123": 1,
    "Mesoporous Silica Nanoparticles": 1,
    "Gold Nanorods": 1,
    "Tungsten": 1,
    "ZnO Nanowires": 1,
    "Pyrolytic Carbon": 2
}

benchtop_characterization = {
    "SEM Imaging": 5,
    "AFM (Atomic Force Microscopy)": 2,
    "XRD (X-ray Diffraction)": 1,
    "Raman Spectroscopy": 2,
    "Optical Transparency Measurements": 4,
    "Mechanical Testing (e.g., bending, strain)": 4,
    "Photoluminescence Measurements": 1,
    "Spatial Resolution Testing (Microscopy)": 3
}

def analyze_paper(paper_text):
    """
    Sends the paper text to the OpenAI API with a prompt instructing the model to produce a JSON output.
    The JSON includes a summary, dictionaries for electrode materials, functional electrode materials,
    benchtop characterization, and an impedance value if available.
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}"
    }
    
    electrode_keys = list(electrode_materials_tally.keys())
    functional_keys = list(functional_electrode_materials.keys())
    characterization_keys = list(benchtop_characterization.keys())
    
    prompt_text = (
        "You are given the full text of a research paper. Analyze the paper and produce a JSON object with the following keys:\n"
        "- 'summary': A concise abstract-like summary of the paper.\n"
        "- 'electrode_materials': A dictionary where each key is one of the following electrode materials: "
        + ", ".join(electrode_keys)
        + ". For each material, output 1 if the paper mentions or uses it, otherwise 0.\n"
        "- 'functional_electrode_materials': A dictionary where each key is one of the following materials: "
        + ", ".join(functional_keys)
        + ". For each material, output 1 if the paper mentions or uses it, otherwise 0.\n"
        "- 'benchtop_characterization': A dictionary where each key is one of the following characterization techniques: "
        + ", ".join(characterization_keys)
        + ". For each technique, output 1 if the paper mentions it, otherwise 0.\n"
        "- 'impedance': If the paper reports an impedance value, extract and output that value as a string; otherwise, output an empty string.\n\n"
        "Ensure the output is valid JSON."
    )
    
    prompt_messages = [
        {"role": "system", "content": prompt_text},
        {"role": "user", "content": paper_text}
    ]
    
    payload = {
        "model": "gpt-4o",
        "messages": prompt_messages,
        "temperature": 0.7
    }
    
    response = requests.post(OPENAI_URL, headers=headers, json=payload)
    data = response.json()
    
    if "choices" not in data:
        print("Error: Unexpected API response format:")
        print(data)
        return None
    
    content = data["choices"][0]["message"]["content"]
    
    # Remove markdown code block markers if present
    if content.startswith("```"):
        lines = content.splitlines()
        if lines[0].strip().startswith("```"):
            lines = lines[1:]
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        content = "\n".join(lines).strip()
    
    try:
        result = json.loads(content)
        return result
    except Exception as e:
        print("Error parsing JSON:", e)
        print("Content received:", content)
        return None

def write_header():
    """
    Writes the CSV header row.
    """
    header = ["Folder", "Summary"]
    header += list(electrode_materials_tally.keys())
    header += list(functional_electrode_materials.keys())
    header += list(benchtop_characterization.keys())
    header.append("impedance")
    with open(CSV_FILE, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(header)

def append_analysis_to_csv(folder_name, analysis_data):
    """
    Appends the analysis data (including summary and additional columns) to the CSV.
    The CSV includes columns for the folder name, summary, each electrode material,
    each functional electrode material, each benchtop characterization method, and impedance.
    """
    row = [folder_name, analysis_data.get("summary", "")]
    # Append electrode materials flags
    for key in electrode_materials_tally.keys():
        row.append(analysis_data.get("electrode_materials", {}).get(key, 0))
    # Append functional electrode materials flags
    for key in functional_electrode_materials.keys():
        row.append(analysis_data.get("functional_electrode_materials", {}).get(key, 0))
    # Append benchtop characterization flags
    for key in benchtop_characterization.keys():
        row.append(analysis_data.get("benchtop_characterization", {}).get(key, 0))
    # Append impedance value
    row.append(analysis_data.get("impedance", ""))
    
    with open(CSV_FILE, "a", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(row)

def main():
    if not os.path.exists(INBOX_PATH):
        print(f"Inbox path does not exist: {INBOX_PATH}")
        return

    # Write the header row (this overwrites any existing file)
    write_header()

    # List all folders in the _inbox
    folder_names = [
        folder for folder in os.listdir(INBOX_PATH)
        if os.path.isdir(os.path.join(INBOX_PATH, folder))
    ]

    for folder in folder_names:
        folder_path = os.path.join(INBOX_PATH, folder)
        paper_file = os.path.join(folder_path, "paper.txt")

        if not os.path.exists(paper_file):
            print(f"Missing paper.txt in folder: {folder}")
            continue

        with open(paper_file, "r", encoding="utf-8") as pf:
            paper_text = pf.read()

        analysis = analyze_paper(paper_text)
        if analysis is None:
            print(f"Analysis failed for folder: {folder}")
        else:
            append_analysis_to_csv(folder, analysis)
            print(f"Processed folder '{folder}' and appended analysis to CSV.")

        # Wait for 6 seconds before processing the next folder
        time.sleep(6)

if __name__ == "__main__":
    main()


Processed folder 'Khaldi et al., 2016' and appended analysis to CSV.
Processed folder 'Khodagholy et al., 2011' and appended analysis to CSV.
Processed folder 'Middya et al., 2021' and appended analysis to CSV.
Processed folder 'Middya et al., 2025' and appended analysis to CSV.
Processed folder 'Sessolo et al., 2013' and appended analysis to CSV.


## trial 2 - try importing the metrics that I want to see

In [3]:
import os
import time
import csv
import json
import requests
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

# Retrieve your OpenAI API key from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Define the path to your _inbox folder and the output CSV file path
INBOX_PATH = r"D:\OneDrive\_Carnegie Mellon (CMU)\60 Academic\63 Literature Review\63.002 Literature Review Exports from Zotero\_inbox"
CSV_FILE = r"literature_data\literature_database_v2.csv"

# OpenAI API endpoint for Chat Completions
OPENAI_URL = "https://api.openai.com/v1/chat/completions"

# Default tally keys (if CSV does not exist)
default_tally_keys = [
    "Polyethylene Terephthalate (PET)",
    "Parylene (Parylene-C, Parylene-HT)",
    "Polyimide (PI)",
    "SU-8 Photoresist",
    "Silicon (Si)",
    "PDMS (Polydimethylsiloxane)",
    "Gold",
    "Platinum (Pt)",
    "Graphene",
    "Carbon Nanotubes (CNTs)",
    "Indium Tin Oxide (ITO)",
    "Titanium (Ti)",
    "Silver",
    "PEDOT:PSS",
    "Nickel",
    "Aluminum",
    "Eutectic Gallium–Indium Alloy (EGaIn)",
    "Chromium (Cr)",
    "Polycarbonate (PC)",
    "Styrene-Ethylene-Butylene-Styrene (SEBS)",
    "Pluronic P123",
    "Mesoporous Silica Nanoparticles",
    "Gold Nanorods",
    "Tungsten",
    "ZnO Nanowires",
    "Pyrolytic Carbon",
    # Additional keys (if combined from functional or benchtop groups)
    "SEM Imaging",
    "AFM (Atomic Force Microscopy)",
    "XRD (X-ray Diffraction)",
    "Raman Spectroscopy",
    "Optical Transparency Measurements",
    "Mechanical Testing (e.g., bending, strain)",
    "Photoluminescence Measurements",
    "Spatial Resolution Testing (Microscopy)"
]

def read_tally_keys_from_csv():
    """
    Reads the CSV header and returns a list of tally keys.
    Assumes that the first two columns are 'Folder' and 'Summary'
    and the last column is the impedance value.
    If the CSV file does not exist, returns default_tally_keys.
    """
    if os.path.exists(CSV_FILE):
        with open(CSV_FILE, "r", newline="", encoding="utf-8") as csvfile:
            reader = csv.reader(csvfile)
            header = next(reader)
            if len(header) > 3:
                # Exclude 'Folder', 'Summary', and last column (impedance)
                return header[2:-1]
    return default_tally_keys

def analyze_paper(paper_text, tally_keys):
    """
    Sends the paper text to the OpenAI API with a prompt instructing the model to produce a JSON object.
    The output JSON includes:
      - 'summary': A concise abstract-like summary of the paper.
      - 'tally': A dictionary where each key is one of the provided tally_keys.
                For each key, output 1 if the paper mentions or uses it, otherwise 0.
      - 'impedance': If the paper reports an impedance value, extract and output that value as a string;
                     otherwise, output an empty string.
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}"
    }
    
    prompt_text = (
        "You are given the full text of a research paper. Analyze the paper and produce a JSON object with the following keys:\n"
        "- 'summary': A concise abstract-like summary of the paper.\n"
        "- 'tally': A dictionary where each key is one of the following: " 
        + ", ".join(tally_keys) + 
        ". For each key, output 1 if the paper mentions or uses it, otherwise 0.\n"
        "- 'impedance': If the paper reports an impedance value (e.g. '56 ± 8 kΩ'), extract and output that value as a string; otherwise, output an empty string.\n\n"
        "Ensure the output is valid JSON."
    )
    
    prompt_messages = [
        {"role": "system", "content": prompt_text},
        {"role": "user", "content": paper_text}
    ]
    
    payload = {
        "model": "o3-mini",
        "messages": prompt_messages,
    }
    
    response = requests.post(OPENAI_URL, headers=headers, json=payload)
    data = response.json()
    
    if "choices" not in data:
        print("Error: Unexpected API response format:")
        print(data)
        return None
    
    content = data["choices"][0]["message"]["content"]
    
    # Remove markdown code block markers if present
    if content.startswith("```"):
        lines = content.splitlines()
        if lines[0].strip().startswith("```"):
            lines = lines[1:]
        if lines and lines[-1].strip() == "```":
            lines = lines[:-1]
        content = "\n".join(lines).strip()
    
    try:
        result = json.loads(content)
        return result
    except Exception as e:
        print("Error parsing JSON:", e)
        print("Content received:", content)
        return None

def write_header(tally_keys):
    """
    Writes the CSV header row using the provided tally_keys.
    Header columns: Folder, Summary, [tally_keys...], Impedance at 1kHz
    """
    header = ["Folder", "Summary"] + tally_keys + ["Impedance at 1kHz"]
    with open(CSV_FILE, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(header)

def append_analysis_to_csv(folder_name, analysis_data, tally_keys):
    """
    Appends the analysis data to the CSV.
    Data is written in the order of the header:
    Folder, Summary, then each key in tally_keys, then Impedance at 1kHz.
    """
    row = [folder_name, analysis_data.get("summary", "")]
    tally = analysis_data.get("tally", {})
    for key in tally_keys:
        row.append(tally.get(key, 0))
    row.append(analysis_data.get("impedance", ""))
    
    with open(CSV_FILE, "a", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(row)

def main():
    if not os.path.exists(INBOX_PATH):
        print(f"Inbox path does not exist: {INBOX_PATH}")
        return

    # Determine tally keys based on existing CSV header (or defaults)
    tally_keys = read_tally_keys_from_csv()
    
    # Write header row to CSV (this overwrites any existing file)
    write_header(tally_keys)
    
    # List all folders in the _inbox
    folder_names = [
        folder for folder in os.listdir(INBOX_PATH)
        if os.path.isdir(os.path.join(INBOX_PATH, folder))
    ]
    
    for folder in folder_names:
        folder_path = os.path.join(INBOX_PATH, folder)
        paper_file = os.path.join(folder_path, "paper.txt")
        
        if not os.path.exists(paper_file):
            print(f"Missing paper.txt in folder: {folder}")
            continue
        
        with open(paper_file, "r", encoding="utf-8") as pf:
            paper_text = pf.read()
        
        analysis = analyze_paper(paper_text, tally_keys)
        if analysis is None:
            print(f"Analysis failed for folder: {folder}")
        else:
            append_analysis_to_csv(folder, analysis, tally_keys)
            print(f"Processed folder '{folder}' and appended analysis to CSV.")
        
        # Wait for 6 seconds before processing the next folder
        time.sleep(6)

if __name__ == "__main__":
    main()


Processed folder 'Khaldi et al., 2016' and appended analysis to CSV.
Processed folder 'Khodagholy et al., 2011' and appended analysis to CSV.
Processed folder 'Middya et al., 2021' and appended analysis to CSV.
Processed folder 'Middya et al., 2025' and appended analysis to CSV.
Processed folder 'Sessolo et al., 2013' and appended analysis to CSV.
