# Chapter 1 - Conclusions

## General setup

In [1]:
import docx
from docx import Document

import openai

openai.api_key = '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'

In [2]:
import torch
from transformers import AutoTokenizer

In [3]:
expert_interview_file_path = 'H:\Documents\Software Development\QUB Software Development\Data_analysis_module\ExpertInterview.docx'

In [4]:
AI_interview_file_path = 'H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1ProjectReport.docx'

In [5]:
industry = "vintage banjo refurbishment and retail"

## Functions

In [6]:
# Read in the file and chunk paragraphs if they exceed 2000 words

def read_word_file_2000(file_path):
    # Initialize a Document object
    document = Document(file_path)
    
    # Initialize a dictionary to store chunks
    chunk_dict = {}
    chunk_count = 1
    words_count = 0
    
    # Loop through each paragraph in the Document object
    for paragraph in document.paragraphs:
        # Ignore the paragraph if it's a heading or empty
        if paragraph.style.name.startswith('Heading') or paragraph.style.name.startswith('Title') or not paragraph.text.strip():
            continue
        
        # Count the words in the paragraph
        words_count += len(paragraph.text.split())

        # Check if the words count exceeds or equals 2000
        if words_count >= 2000:
            # Add the paragraph to the dictionary as a chunk
            chunk_dict[f'Chunk {chunk_count}'] = paragraph.text
            
            # Reset the words count and increment the chunk count
            words_count = 0
            chunk_count += 1
        else:
            # Add the paragraph to the current chunk
            chunk_dict.setdefault(f'Chunk {chunk_count}', '')
            chunk_dict[f'Chunk {chunk_count}'] += '\n' + paragraph.text
    
    return chunk_dict

In [7]:
# Using ChatGPT to summarise paragraphs

def summarize_paragraphs(paragraph_dict, prompt):
    openai.api_key = openai.api_key
    summarized_text = "" # initialize a string to store the summarized text

    for key, paragraph in paragraph_dict.items():
        print(f"Summarizing {key}")
        response = openai.ChatCompletion.create(
          model="gpt-3.5-turbo",
          messages=[
                {"role": "system", "content": prompt},
                {"role": "user", "content": paragraph},
            ]
        )
        # append the model's reply (i.e., the summary of the paragraph) to the summarized_text string
        summarized_text += "\n" + response['choices'][0]['message']['content']   
    
    return summarized_text

In [8]:
# Function that takes 2 docs to compare and contrast

def query_completion(doc1, doc2, prompt):
    openai.api_key = openai.api_key
    messages=[
        {"role": "system", "content": "You are an expert data analyst who always looks for the most promising areas of a business to investigate through data driven analysis."},
        {"role": "user", "content": f"Document 1: {doc1}"},
        {"role": "user", "content": f"Document 2: {doc2}"},
        {"role": "user", "content": prompt},
    ]

    response = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=messages
    )
    
    return response.choices[0].message['content']


In [None]:
# If it's necessary to check the tokens of summaries before passing back to chatgpt api:

def count_tokens(summary):
    # Initialize the tokenizer
    tokenizer = AutoTokenizer.from_pretrained("gpt2")

    # Encode the text into input IDs
    input_ids = torch.tensor(tokenizer.encode(summary)).unsqueeze(0)

    # Count the number of tokens
    num_tokens = input_ids.shape[1]

    print(f"Token count of the summarized text: {num_tokens}")
    return num_tokens

## Read in both the expert interview and AI interview, then chunk them into smaller sections

In [9]:
# Split document in 2000 word chunks
expert_interview_chunked = read_word_file_2000(expert_interview_file_path)

In [10]:
# Split document in 2000 word chunks
AI_interview_chunked = read_word_file_2000(AI_interview_file_path)

In [11]:
# View chunked document
for key, value in expert_interview_chunked.items():
    print(f"{key}: {value}\n")

Chunk 1: 
The purchase, refurbishment, and sale of a vintage 1960’s Framus tenor banjo. 
The business owner’s wife is a banjo teacher whose students have always had trouble finding suitable instruments. Most students start off playing a new, budget instrument bought online in the sub £350 price bracket which they soon outgrow. There are no dealers in a 60 mile radius selling higher end, vintage banjos. Brand new instruments don’t have the same tonal qualities as older ones - the old wood is the secret. 
This project began with a search for any vintage banjo that the expert thinks customers are likely to want. Projects are typically not prompted by a specific request from a customer. 
There are a number of things the expert looks for when purchasing a banjo in need of restoration. The most important thing is the brand name - popular names played by famous players are what attract people to an instrument. Build quality is also important, and having experience with a variety of models ove

In [12]:
# View chunked document
for key, value in AI_interview_chunked.items():
    print(f"{key}: {value}\n")

Chunk 1: 
 Certainly, while it is unfortunate, there was a specific business in the vintage banjo refurbishment and retail industry that experienced a decline and eventually went out of business over the past 20 years. Let's consider the case of Banjo Revive Company (fictional name) and delve into the reasons for their failure.

Banjo Revive Company was once a prominent player in the industry but struggled to adapt to changing market dynamics. One key factor that contributed to their downfall was their resistance to incorporating digital innovation into their business model. Unlike their competitors who embraced e-commerce and online platforms, Banjo Revive Company relied solely on traditional brick-and-mortar retail channels.

In addition to their lack of digital presence, Banjo Revive Company failed to recognize the shifting preferences of customers towards more affordable options and DIY refurbishment. The rise of online banjo communities and forums paved the way for enthusiasts to 

## Summarise both interviews

In [None]:
prompt = f"""You are an expert in data analysis and you are helping a business 
            in the {industry} industry become more successful and increase revenue.
            You have to read the following interview excerpt with the owner of the business.
            The interview includes details about a recent project,
            and discussion with an expert about the wider industry.
            Suggest the key areas you think could lead to 
            the most impactful recommendations for the business 
            through a data analysis investigation, 
            and summarise your conclusions. """

*In the interest of keeping the report short I just used the prompt above to summarise the interviews.*

*If you wanted more information, the prompt below could be looped over, changing the 'top_5' parameter each time.*

In [None]:
# Parameterised prompting
top_5_priority = "values of the business"
#top_5_priority = "challenges for the business"
#top_5_priority = "potentially unexplored revenue streams"
#top_5_priority = "weaknesses of the management team"
#top_5_priority = "impactful recommendations to help the business succeed"

parameter_prompt = f"""You are an expert in data analysis and you are helping a business 
            in the {industry} industry become more successful and increase revenue.
            You have to read the following interview excerpt with the owner of the business.
            The interview includes details about a recent project,
            and discussion with an expert about the wider industry.
            Upon reviewing the interview,
            outline the top 5 {top_5_priority}."""

In [13]:
# Summary of all chunks of expert interview
expert_summary = summarize_paragraphs(expert_interview_chunked, prompt)

Summarizing Chunk 1


In [14]:
# Summary of all chunks of AI generated report
AI_interview_summary = summarize_paragraphs(AI_interview_chunked, prompt)

Summarizing Chunk 1
Summarizing Chunk 2
Summarizing Chunk 3
Summarizing Chunk 4


In [15]:
# View summary
print("\nSummary of the document:\n")
print(expert_summary)


Summary of the document:


Based on the interview excerpt, the following key areas could lead to impactful recommendations for the business through a data analysis investigation:

1. Market Research: Conducting market research to understand the demand for vintage banjos in the local area and identifying potential customers who are interested in purchasing higher-end instruments.

2. Customer Preferences: Analyzing customer preferences and trends in the banjo industry over the past 15 to 20 years. This would involve understanding changes in preferred skin type, tone preferences, and string choices. This data can help the business tailor their refurbishment process and product offerings to meet customer preferences.

3. Pricing Strategy: Analyzing the pricing strategy for refurbished vintage banjos. This would involve analyzing the initial purchase price of banjos, cost of restoration including parts and labor, and the desired profit margin. Additionally, understanding the price range a

In [16]:
# View summary
print("\nSummary of the document:\n")
print(AI_interview_summary)


Summary of the document:


Based on the interview, here are the key areas that could lead to the most impactful recommendations for the business:

1. Embrace digital innovation: The business needs to incorporate e-commerce and online platforms to reach a wider customer base. This would allow the business to tap into the growing trend of online banjo communities and forums and provide customers with more affordable options and support for DIY refurbishment.

2. Offer affordable options and support for DIY refurbishment: Recognize the shifting preferences of customers towards fixer-upper banjos and invest in providing affordable options and resources for customers to refurbish banjos themselves. This could involve offering lower-priced banjo models or refurbishment kits.

3. Provide customization options: Adapt to evolving customer expectations by offering a wider range of banjo models and customization options. This will appeal to customers who are looking for banjos tailored to their 

## Compare and contract the interviews, and draw conclusions

In [17]:
prompt = f"""You are an expert data analyst. You help businesses to succeed by analysing data
            and finding novel, counter-intuitive suggestions to make improvements and increase revenue.
            I have summarised the main points 
            from interviews with 2 business owners in the {industry} industry.
            Can you compare and contrast them?
            As an expert in data analysis, what data would you focus on gathering 
            after reading a summary of both interviews?"""

In [18]:
conclusion = query_completion(expert_summary, AI_interview_summary, prompt)

In [19]:
print(conclusion)

Certainly! Let's compare and contrast the main points from the interviews with the business owners in the vintage banjo refurbishment and retail industry:

Business Owner 1 (Document 1):
- Key areas for analysis highlighted include market research, customer preferences, pricing strategy, marketing channels, sourcing challenges, competition analysis, and time allocation.
- The emphasis is on market research, understanding customer preferences and trends, optimizing pricing strategies, analyzing marketing channels, and addressing challenges in sourcing and competition.

Business Owner 2 (Document 2):
- Key areas for analysis highlighted include digital innovation, affordability, customization, and community engagement.
- The focus is on embracing e-commerce and online platforms, providing affordable options for self-refurbishment, offering customization, and engaging with the banjo community.

Although both interview excerpts provide valuable insights, they differ in their recommendation

## Finally, construct the word document

In [28]:
# Now add real expert interview and conclusion section to the project report

In [35]:
document = Document('H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1ProjectReport.docx')

In [36]:
document.add_heading("1.5. Summary of an interview with a real industry expert", 2)

<docx.text.paragraph.Paragraph at 0x24457ca8fd0>

In [37]:
document.add_paragraph(expert_summary)

<docx.text.paragraph.Paragraph at 0x24457ca8a60>

In [38]:
document.add_heading("1.6. Conclusions", 2)

<docx.text.paragraph.Paragraph at 0x24457ca9b70>

In [39]:
document.add_paragraph("AI generated comparison between expert interview and AI generated report, and final conclusions:\n")

<docx.text.paragraph.Paragraph at 0x24457caa650>

In [40]:
document.add_paragraph(conclusion)

<docx.text.paragraph.Paragraph at 0x24457ca8550>

In [41]:
document.save('H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1ProjectReport.docx')