In [None]:
pip install langchain
pip install openai

In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
openai_api_key=os.getenv('OPENAI_API_KEY', 'YourAPIKey')

from langchain.document_loaders import DirectoryLoader
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import create_extraction_chain
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

In [2]:
llm3 = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0613", request_timeout = 3000)

llm4 = ChatOpenAI(temperature=0, model_name="gpt-4-0613", request_timeout = 3000)

In [3]:
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n", " "], chunk_size=10000, chunk_overlap=2200)
loader = DirectoryLoader('C:\CS410-CourseProject\Technical Paper/', glob='**/*.txt')

documents = loader.load()
print (f"Directory folder has been reviewd to find the number of files and {len(documents)} transcripts loaded.")

texts = text_splitter.split_documents(documents)
print (f"Next, the {len(documents)} loaded transcripts have been split into {len(texts)} chunks.")

Directory folder has been reviewd to find the number of files and 1 transcripts loaded.
Next, the 1 loaded transcripts have been split into 3 chunks.


In [4]:
docs = loader.load()

In [5]:
template="""
Find topic from course transcript or technical paper
- Your goal is to extract the topic names from document.
- Provide a brief description of the topics after the topic name. Example: 'Topic: Brief Description'
- Only pull topics from the transcript or technical paper. Do not use the examples.

"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

human_prompt = "Transcript: {text}" 
human_message_template = HumanMessagePromptTemplate.from_template(human_prompt)

chat_prompt = ChatPromptTemplate.from_messages(messages = [system_message_prompt, human_message_template])

In [6]:
template="""
You are a helpful assistant that helps retrieve topics that discusssed in course transcript or technical document.
- You will be given a series of bullet topics of topics found.
- Your goal is to exract the topic names and brief description of the topic.
- Only pull topics from the transcript or technical documents. Do not use the examples.

"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

human_prompt = "Transcript: {text}" 
human_message_template = HumanMessagePromptTemplate.from_template(human_prompt)

chat_prompt_combine = ChatPromptTemplate.from_messages(messages=[system_message_prompt, human_message_template])

In [7]:
chain = load_summarize_chain(llm4, chain_type = "map_reduce",
                             map_prompt = chat_prompt, combine_prompt = chat_prompt_combine)

In [8]:
topics_found = chain.run({"input_documents": docs})

In [9]:
print (topics_found)

1. Topic: Influence of Embedded Material on Natural Frequencies of Double Segment Rotating Disk
   Description: The topic covers the effects of incorporating different materials at one edge of a rotating disk on its natural frequencies and critical speeds. The discussion is based on the linear in-plane free vibration of a compound disk with material discontinuity.

2. Topic: In-Plane Free Vibration of Rotating Disks
   Description: This topic delves into the in-plane free vibration of rotating disks, especially those with material discontinuity. It examines the impact of rotational speed and radius ratio on the natural frequency and elastic stability of fixed-free rotating disks.

3. Topic: Development of the Governing Equation
   Description: This topic involves the derivation of the governing equations of motion for a rotating compound disk. It covers the plane stress condition, the equations of motion for a point on the disk in polar coordinates, and the associated normal and shear 

In [10]:
schema = {
    "properties": {
        "topic_name": {
            "type": "string",
            "description" : "The title of the topic listed"
        },
        "description": {
            "type": "string",
            "description" : "The description of the topic listed"
        },
    },
    "required": ["topic", "description"],
}

In [11]:
chain = create_extraction_chain(schema, llm3)
topics_structured = chain.run(topics_found)

In [12]:
topics_structured

[{'topic_name': 'Influence of Embedded Material on Natural Frequencies of Double Segment Rotating Disk',
  'description': 'The topic covers the effects of incorporating different materials at one edge of a rotating disk on its natural frequencies and critical speeds. The discussion is based on the linear in-plane free vibration of a compound disk with material discontinuity.'},
 {'topic_name': 'In-Plane Free Vibration of Rotating Disks',
  'description': 'This topic delves into the in-plane free vibration of rotating disks, especially those with material discontinuity. It examines the impact of rotational speed and radius ratio on the natural frequency and elastic stability of fixed-free rotating disks.'},
 {'topic_name': 'Development of the Governing Equation',
  'description': 'This topic involves the derivation of the governing equations of motion for a rotating compound disk. It covers the plane stress condition, the equations of motion for a point on the disk in polar coordinates,

In [13]:
import pandas as pd
df = pd.DataFrame(topics_structured)
df

Unnamed: 0,topic_name,description
0,Influence of Embedded Material on Natural Freq...,The topic covers the effects of incorporating ...
1,In-Plane Free Vibration of Rotating Disks,This topic delves into the in-plane free vibra...
2,Development of the Governing Equation,This topic involves the derivation of the gove...
3,Modal Stresses and Displacements,This topic discusses the solution to the coupl...
4,Natural Frequency Equations,This topic explains the process of obtaining t...
5,Discussion and Results,"This topic presents the results of the study, ..."
6,Conclusions,"This topic summarizes the study's findings, em..."


In [14]:
df.to_csv(r'C:\CS410-CourseProject\topic_compound_rotating_disk_paper.csv', mode='a', index = None)