In [45]:
import os
import pandas as pd
import requests
import time
import backoff

# Set your API key here
api_key = "YOUR_API_KEY"

@backoff.on_exception(backoff.expo, Exception, max_tries=5)
def summarize_text(text):
    try:
        headers = {
            "Content-Type": "application/json",
            "x-api-key": api_key,
            "anthropic-version": "2023-06-01"
        }
        
        data = {
            "model": "claude-3-5-sonnet-20240620",
            "max_tokens": 1000,
            "system": "You are a helpful assistant that is specialized in reading and analyzing research papers.",
            "messages": [
                {"role": "user", "content": f"""Please provide two summaries of the following text:

1. A short description of 2-3 lines.
2. A long description of minimum 500 tokens.

Format your response as follows:
Short Description: [Your short summary here]

Long Description: [Your long summary here]

Here's the text to summarize:

{text}"""}
            ]
        }
        
        response = requests.post("https://api.anthropic.com/v1/messages", json=data, headers=headers)
        
        if response.status_code == 200:
            full_response = response.json()['content'][0]['text']
            short_desc = full_response.split("Short Description:")[1].split("Long Description:")[0].strip()
            long_desc = full_response.split("Long Description:")[1].strip()
            return short_desc, long_desc
        else:
            raise Exception(f"Error: {response.status_code} - {response.text}")
    except Exception as e:
        if 'rate_limit_exceeded' in str(e):
            retry_after = 60  # Default to 60 seconds if retry time is not provided
            print(f"Rate limit exceeded. Waiting for {retry_after} seconds.")
            time.sleep(retry_after)
            raise e
        else:
            print(f"An error occurred: {e}")
            raise e

def process_file(filename):
    thesis_num = filename.split('_')[0]
    with open(filename, 'r') as file:
        content = file.read()
    short_desc, long_desc = summarize_text(content)
    return thesis_num, short_desc, long_desc

# Create an empty list to store the results
results = []

# Process the file
filename = '001_000_000.txt'
if os.path.exists(filename):
    thesis_num, short_desc, long_desc = process_file(filename)
    results.append({'thesis_num': thesis_num, 'description': long_desc})
else:
    print(f"File {filename} not found.")

# Create a DataFrame from the results
df = pd.DataFrame(results)

# Write the DataFrame to a CSV file
csv_filename = 'summaries.csv'
df.to_csv(csv_filename, index=False)

print(f"Summaries have been written to {csv_filename}")

Summaries have been written to summaries.csv
