In [38]:

from typing import Iterator, List
from pdfminer.high_level import extract_pages
from pdfminer.layout import LTTextContainer, LTPage
from collections import Counter

def read_file(file_name):
    file = extract_pages(file_name)
    return file
    
    
def get_paragraphs(file: Iterator[LTPage]) -> List[dict]:
    paragraphs = []
    page_num = 0
    paragraph_id = 0
    for page_layout in file:
        page_num += 1
        
        for element in page_layout:
            if isinstance(element, LTTextContainer):
                paragraphs.append({
                    'id': paragraph_id,
                    'page': page_num,
                    'paragraph': element.get_text()
                })
                paragraph_id += 1
                
    return paragraphs

def get_terms(file: Iterator[LTPage]) -> List[dict]:
    pass


In [39]:
in_file = read_file('../data/raw/Chapter-1---The-Rise-of-Platform-Ecosystems_2014_Platform-Ecosystems.pdf')
paragraphs = get_paragraphs(in_file)

print(f"Total paragraphs: {len(paragraphs)}")
print(f"Actual paragraphs: {len([p for p in paragraphs if len(p['paragraph']) > 30])}")

Total paragraphs: 386
Actual paragraphs: 109


In [56]:
from typing import List
from dotenv import find_dotenv, load_dotenv
from openai import OpenAI
from src.api_integrations.interfaces.llm_generic_interface import LLMGenericInterface


class OpenAIInterface(LLMGenericInterface):
    
    client = None
        
    def __init__(self):
        self.setup()
        
    def setup(self, environment: bool = False):
        if not environment:
            dotenv_path = find_dotenv()
            load_dotenv(dotenv_path)
        
        self.client = OpenAI()
            
            
    
    def sys_message(self, message: str):
        return {"role": "system", "content": message}
    
    def user_message(self, message: str):
        return {"role": "user", "content": message}
    
    prompts = {
        "expert_tutor": "You are an expert tutor. Give an extensive and descriptive answer using relevant technical terms and examples.",
        "profile_picture": "Create an avatar of a happy robot tutor in the course {course_name}. Make the robot be the highlight, with a background displaying a simple pattern",
        "summerize_paragraph": "Summerize the user's paragraph in one descriptive sentence. Focus on keeping relevant technical terms and keep it short but concise.",
    }
        
      
    def generic_request(self, query: str, params: dict = None, history_key: str = None) -> str | List[str]:
        completion = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": self.prompts["expert tutor"]},
                {"role": "user", "content": query}
            ]
        )
        
        return completion.choices[0].text
    
    def summerize_paragraph(self, text: str) -> str:
        completion = self.client.chat.completions.create(
            model="gpt-4-1106-preview",
            temperature=0.3,
            top_p=0.1,
            messages=[
                {"role": "system", "content": self.prompts["summerize_paragraph"]},
                {"role": "user", "content": text}
            ]
        )
        
        return completion.choices[0].message.content
    


In [42]:
example_paragraph = "Blackberry had everything going right. It had fanatically loyal customers and its products were inno-\
vative, well engineered, durable, and got raving reviews from critics. After years of commanding a\
lion’s share (about 50%) of the smartphone market that it largely created, it had trouble breaking past\
a 1% market share with its newest products by 2012, leading to its subsequent downfall Blackberry\
assumed that the problem was Apple and then Google—both industry outsiders—who had since\
entered the fray. So, it did what made sense: Price more competitively, invest more in developing\
new products, upgrade its operating system, and step up marketing. Nothing worked. Its error was fail-\
ning to realize that the basis for competition had changed: It was no longer Blackberry against Apple\
smartphones. Instead, it was the Blackberry ecosystem against the iOS ecosystem. It was not one prod-\
uct against another but Blackberry’s army of 8000 external innovators against Apple’s 200,000. Black-\
berry’s mistake was failing to realize the ecosystem on which its continued success depended. All three\
companies made good products, but the lack of enough innovative apps muted Blackberry’s market\
potential. It was already too late to catch up by the time Blackberry realized that the competitive blue-\
print had shifted. The Red Queen effect — the need to run faster just to stay in the same place—had\
taken over."


In [57]:
model = OpenAIInterface()

response = model.summerize_paragraph(example_paragraph)
print(f"original length: {len(example_paragraph)}, summary length: {len(response)}")
print(f"summary: {response}")

original length: 1401, summary length: 353
summary: Blackberry's downfall was due to its failure to recognize the shift in competition from individual smartphones to the broader ecosystem, resulting in an inability to keep up with Apple's expansive network of innovators and the critical mass of innovative apps, despite making competitive pricing, product development, OS upgrades, and marketing efforts.
