In [1]:
import openai
import os
from bs4 import BeautifulSoup
import requests



### Using OpenAI API

In [2]:

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [3]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [4]:
def extract_customer_reviews(url):
    """
    This function is extracting customer reviews
    Args: url
    Returns: reviews if succeceded
    """
    # Send a GET request to the specified URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract customer reviews
        reviews = soup.find_all('div', class_='text_content')

        # Return the extracted reviews
        return reviews
    else:
        # Handle error scenario
        print(f"Error: Unable to fetch reviews from URL {url}")
        return []

In [None]:
url = "https://www.airlinequality.com/airline-reviews/british-airways"
reviews = extract_customer_reviews(url)
print(reviews)

In [None]:
#checking randomly any review
reviews[3].get_text()
# or 

In [None]:
# the review taken from site contains the special symbols like ticks or cross for trip verification too, 
#so removing this part for sentiment analysis as this is not required 
processed_reviews=[]    
for i, review in enumerate(reviews):
        # Extract the review text
        
        review_text = review.get_text()
        
        # Split the text based on the '|' character and take the second part
        review_part = review_text.split('|')[1].strip()
        processed_reviews.append(review_part)
        # Print the extracted review part
        print(f"Review {i + 1} Part:")
        print(processed_reviews)
        print()

### we begin our analysis without prompt engineering

In [None]:

example_indices = [3,5]

for i,index in enumerate(example_indices):
    
    review = processed_reviews[index]
    

    response = get_completion(review)
    print(f'Review {i}')
    print(f'{response}\n')

### From results above we can see without explicit instruction model is unable to know our exact intentions hence here prompt engineering is neccessary

### Here we are defining our phase 1 in which try to ask the classify the sentiment of passenger and the top two major problems faced by passenger

In [None]:
# phase 1 
example_indices = [3,5,6,8]
stored_info = []

for i,index in enumerate(example_indices):
    
    review = processed_reviews[index]
    
    prompt = f"""
    You will be provided with a review delimeted by <>
    First you need to identified the sentiment of the review. \
    In case of negative sentiment then identify the \
    top two problems no more than five words.\
    In case of positive sentiment then identify the reason\
    of satisfaction travelling with this airline with no more\
    five words.
    The output should be alinged as per the sentiment of the review.
    Output should looks like:\
    Sentiment:````
    Reason of dissatisfaction/satisfaction:
    1.```
    2.```

    
    <{review}>
    
    """
    response = get_completion(prompt)
    stored_info.append(response)
    print(f'Review {i}')
    print(f'{response}\n')



### Here we are trying to find the number of satisfied and dissatisfied customers

In [None]:
# phase 2

 
prompt = f"""
You have given the list delimeted by <> \
This list contains the reason of satisfaction and
dissatisfaction.
Your task is to count the frequency of different satisfaction and dissatisfaction.
<{stored_info}>
    
    """
overall_response = get_completion(prompt)
#print(f'Review {i}')
print(f'{overall_response}\n')

### In the phase 3 we try to achieve the frequency of different reasons of satisfaction and dissatisfaction

In [None]:
# Effort 1
# phase 2 is in progress
# phase 1 is acheived
   
prompt = f"""
    
    You have to count the reasons of satisfaction and dissatisfaction \
    present in {stored_info} lies in same category.\
    For example:
    two phrases 'boarded on time' and 'on time' pointing to same conclusion\
    that is flight is on time, hence for any phrases which is pointing to\
    the same conclusion you need to rename with same words (not more than five words) for any such phrases.\
    Then count these similar phrases and place in the dictionary which has the key\
    containing phrase and value is the number of times that similar concluding words\
    appeared.
    The output should be structered not by the review but by reason only as:
    Reason ````: 

    
    """
overall_response = get_completion(prompt)
print(f'{overall_response}\n')



## Future prospect
### Next task is to build the UI so that stakeholder can easily interact
### And to build the pipeline which can handle the live information