In [1]:
import pandas as pd

airline_df = pd.read_csv("airline_top20_df_filtered.csv")

In [2]:
airline_df.head()

Unnamed: 0,airline_name,content,cabin_flown,overall_rating,seat_comfort_rating,cabin_staff_rating,food_beverages_rating,value_money_rating,recommended
0,Air Canada,London to Toronto. Flight delayed 5 hrs and no...,Economy,3.0,1.0,1.0,1.0,1.0,no
1,Air Canada,We had a confirmed booking to fly from Toronto...,Economy,1.0,1.0,1.0,1.0,1.0,no
2,Air Canada,I travelled 27th July from Toronto to LHR. Thi...,Economy,1.0,1.0,4.0,2.0,1.0,no
3,Air Canada,Slow check-in in Beijing. Used the Air China B...,Business Class,5.0,2.0,2.0,2.0,3.0,yes
4,Air Canada,Indifferent cabin crew acted like passengers w...,Economy,3.0,3.0,2.0,1.0,2.0,no


In [3]:
from transformers import pipeline
import pandas as pd

#BERT model koji radi na više jezika, prepoznaje sentimen, razume kontekst rečenice i vraća ocene od 1-5

sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment",
    truncation=True
)

#funkciji se prosleđuje tekst recenzije i lista aspekata
def aspect_sentiment(text, aspects):
    results = {}
    #za svaki aspekt se posebno radi i pamti sentiment
    for aspect in aspects:
        #skraćivanje recenzija na dozvoljeni broj koraktera
        input_text = f"{aspect}: {text[:500]}" 
        #pristupa se listi
        res = sentiment_analyzer(input_text)[0]
        #print(res['score'])
        # s obzirom da se dodeljije ocena '4 stars' uzima se samo numerička vrednost
        score_5 = int(res['label'][0])
        # pošto su druge generisane ukupne ocene u dijapazanu 1-10 i ove vrednosti se pretvaraju u isti oblik
        score_10 = 1 + (score_5 - 1) * 9 / 4
        #results[aspect] = score_10
        weighted_score_10 = score_10 * res['score']
        results[aspect] = weighted_score_10
    return results


Device set to use cpu


In [4]:

#modelu se usmerava pažnja na ove koncepte za otkrivanje sentimenta
aspects_airline = ["seat_comfort", "cabin_staff", "food_beverages", "value_money"]

#lista za skupljanje rezultata svake pojedinačne recenzije
all_results_airline = []
#prolazi se kroz svaki red u dataset-u
for idx, row in airline_df.iterrows():
    sentiment_scores = aspect_sentiment(row['content'], aspects_airline)
    sentiment_scores['airline_name'] = row['airline_name']
    all_results_airline.append(sentiment_scores)

#formira se DataFrame rezultata
absa_airline = pd.DataFrame(all_results_airline)

#formira se prosečna ocena za sve vrednosti recenzija iste aviokompanije
absa_summary_airline = (
    absa_airline.groupby('airline_name')[aspects_airline]
    .mean()
    .round(2)  # zaokruživanje na dve decimale
    .reset_index()
)

print("Prosečne ocene ključnih aspekata aviokompanija:")
print(absa_summary_airline)


Prosečne ocene ključnih aspekata aviokompanija:
                airline_name  seat_comfort  cabin_staff  food_beverages  \
0                 Air Canada          2.11         1.96            2.06   
1                 Air France          2.44         2.26            2.38   
2          American Airlines          1.79         1.63            1.74   
3            British Airways          2.58         2.42            2.52   
4     Cathay Pacific Airways          3.07         2.88            2.98   
5            Delta Air Lines          2.35         2.15            2.29   
6                   Emirates          2.88         2.70            2.82   
7             Etihad Airways          2.20         2.02            2.14   
8                Jet Airways          2.52         2.34            2.45   
9   Klm Royal Dutch Airlines          3.08         2.91            3.03   
10                 Lufthansa          3.11         2.92            3.03   
11         Malaysia Airlines          2.90         2

In [5]:
import pandas as pd

lounge_df = pd.read_csv("lounge_top20_df_filtered.csv")

In [None]:
# --- Lounge dataset ---
aspects_lounge = ["comfort", "cleanliness", "bar_beverages", "catering", "washrooms", "wifi_connectivity", "staff_service"]

all_results_lounge = []
for idx, row in lounge_df.iterrows():
    sentiment_scores = aspect_sentiment(row['content'], aspects_lounge)
    sentiment_scores['airline_name'] = row['airline_name']
    all_results_lounge.append(sentiment_scores)

absa_lounge = pd.DataFrame(all_results_lounge)

absa_summary_lounge = (
    absa_lounge.groupby('airline_name')[aspects_lounge]
    .mean()
    .round(2)  
    .reset_index()
)

print("\nProsečne ocene lounge-a po aviokompanijama:")
print(absa_summary_lounge)


In [None]:
import pandas as pd

seat_df = pd.read_csv("seat_top20_df_filtered.csv")

In [None]:

# --- Seat dataset ---
aspects_seat = ["seat_legroom", "seat_recline", "seat_width", "aisle_space", "viewing_tv"]

all_results_seat = []
for idx, row in seat_df.iterrows():
    sentiment_scores = aspect_sentiment(row['content'], aspects_seat)
    sentiment_scores['airline_name'] = row['airline_name']
    all_results_seat.append(sentiment_scores)

absa_seat = pd.DataFrame(all_results_seat)

absa_summary_seat = (
    absa_seat.groupby('airline_name')[aspects_seat]
    .mean()
    .round(2)  
    .reset_index()
)

print("\nProsečne ocene sedišta po aviokompanijama:")
print(absa_summary_seat)


### Za veće skupove podataka treba promeniti način upisa u bazu

In [None]:
import psycopg2

conn = None
cursor = None

try:
    conn = psycopg2.connect(
        host="localhost",
        database="airline_recommendations_db",
        user="postgres",
        password="postgres",
        port=5432
    )
    cursor = conn.cursor()

    cursor.execute("TRUNCATE TABLE seat_sentiment_summary;")

    for _, row in absa_summary_seat.iterrows():
        cursor.execute(
            """
            INSERT INTO seat_sentiment_summary (
                airline_name,
                seat_legroom,
                seat_recline,
                seat_width,
                aisle_space,
                viewing_tv
            )
            VALUES (%s, %s, %s, %s, %s, %s);
            """,
            (
                row['airline_name'],
                float(row['seat_legroom']),
                float(row['seat_recline']),
                float(row['seat_width']),
                float(row['aisle_space']),
                float(row['viewing_tv'])
            )
        )

    conn.commit()
    print("Seat podaci uspešno upisani!")

except Exception as e:
    print("Greška prilikom upisa seat_sentiment_summary:", e)

finally:
    if cursor:
        cursor.close()
    if conn:
        conn.close()


In [None]:
import psycopg2

conn = None
cursor = None

try:
    conn = psycopg2.connect(
        host="localhost",
        database="airline_recommendations_db",
        user="postgres",
        password="postgres",
        port=5432
    )
    cursor = conn.cursor()

    cursor.execute("TRUNCATE TABLE lounge_sentiment_summary;")

    for _, row in absa_summary_lounge.iterrows():
        cursor.execute(
            """
            INSERT INTO lounge_sentiment_summary (
                airline_name,
                comfort,
                cleanliness,
                bar_beverages,
                catering,
                washrooms,
                wifi_connectivity,
                staff_service
            )
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s);
            """,
            (
                row['airline_name'],
                float(row['comfort']),
                float(row['cleanliness']),
                float(row['bar_beverages']),
                float(row['catering']),
                float(row['washrooms']),
                float(row['wifi_connectivity']),
                float(row['staff_service'])
            )
        )

    conn.commit()
    print("Lounge podaci uspešno upisani!")

except Exception as e:
    print("Greška prilikom upisa lounge_sentiment_summary:", e)

finally:
    if cursor:
        cursor.close()
    if conn:
        conn.close()


In [None]:
import psycopg2

conn = None
cursor = None

try:
    conn = psycopg2.connect(
        host="localhost",
        database="airline_recommendations_db",
        user="postgres",
        password="postgres",
        port=5432
    )
    cursor = conn.cursor()

    cursor.execute("TRUNCATE TABLE airline_sentiment_summary;")

    for _, row in absa_summary_airline.iterrows():
        cursor.execute(
            """
            INSERT INTO airline_sentiment_summary (
                airline_name,
                seat_comfort,
                cabin_staff,
                food_beverages,
                value_money
            )
            VALUES (%s, %s, %s, %s, %s);
            """,
            (
                row['airline_name'],
                float(row['seat_comfort']),
                float(row['cabin_staff']),
                float(row['food_beverages']),
                float(row['value_money'])
            )
        )

    conn.commit()
    print("Airline podaci uspešno upisani!")

except Exception as e:
    print("Greška prilikom upisa airline_sentiment_summary:", e)

finally:
    if cursor:
        cursor.close()
    if conn:
        conn.close()
