In [9]:
import pandas as pd

airline_df = pd.read_csv("airline_top20_df_filtered.csv")
lounge_df = pd.read_csv("lounge_top20_df_filtered.csv")
seat_df = pd.read_csv("seat_top20_df_filtered.csv")

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

# nltk.download('vader_lexicon')

#inicijalizacija VARDER sentiment analizatora
sia = SentimentIntensityAnalyzer()


airline_df_copy = airline_df.copy()

#definisanje ulaznih kolona/aspekata koji će se koristiti prilikom treniranja modela
airline_columns = ['value_money_rating', 'seat_comfort_rating', 'cabin_staff_rating', 'food_beverages_rating']

X = airline_df_copy[airline_columns]
y = airline_df_copy['overall_rating']  

#podela skupa podataka na trening i test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#treniranje modela primenom Gradient Boosting model
#omogućava modelovanje nelinearnih odnosa između pojedinačnih aspektnih ocena i ukupne ocene
#može da uhvati da ne utiču svi aspekti jednako i da njihov uticaj zavisi od kombinacije vrednosti
#dobro funkcioniše sa realnim, šumovitim podacima kakvi su korisničke recenzije
#doprinosi interpretabilnosti sistema
model = GradientBoostingRegressor(random_state=42)
model.fit(X_train, y_train)

#izvlačenje dobijenih težina atributa iz modela
weights_airline = model.feature_importances_
print("ML Težine:")
for col, w in zip(airline_columns, weights_airline):
    print(f"{col}: {w:.3f}")

#funkcija računa ukupan score po recenziji na osnovu dobijenih težina i stvarnih vrednosti aspekata
def compute_ml_score(row):
    return sum(row[col] * w for col, w in zip(airline_columns, weights_airline))

airline_df_copy['ml_score'] = airline_df_copy.apply(compute_ml_score, axis=1)

#normalizacije ocene na skalu 1-10
ml_min, ml_max = airline_df_copy['ml_score'].min(), airline_df_copy['ml_score'].max()
airline_df_copy['ml_score_norm'] = 1 + 9 * (airline_df_copy['ml_score'] - ml_min) / (ml_max - ml_min)

#funkcija koja sprovodi sentiment analizu teksta
def sentiment_to_1_10(text):
    comp = sia.polarity_scores(text)['compound'] # -1 do 1
    return 1 + 9 * ((comp + 1) / 2)

airline_df_copy['sentiment_score'] = airline_df_copy['content'].apply(sentiment_to_1_10)

#konačna ocena se formira na osnovu kombinacije numeričkih ocena i vrednosti dobijenih analizom sentimenta
airline_df_copy['final_score'] = 0.7 * airline_df_copy['ml_score_norm'] + 0.3 * airline_df_copy['sentiment_score']

#konačni score se dodatno smanjuje ili povećava ukoliko je vrednost atributa recommended no ili yes
#pri tome se vodi računa da skala ostane ista
def adjust_by_recommended(row):
    score = row['final_score']
    if row['recommended'] == 'yes':
        score *= 1.05
    elif row['recommended'] == 'no':
        score *= 0.95
    return min(max(score, 1), 10)

airline_df_copy['final_score_adjusted'] = airline_df_copy.apply(adjust_by_recommended, axis=1)

#prikazuje se 20najboljih aviokompanija
top_20_airlines = (
    airline_df_copy.groupby('airline_name')['final_score_adjusted']
      .mean()
      .round(2)
      .sort_values(ascending=False)
      .head(20)
)

print("\nTOP 20 AVIOKOMPANIJA (ML + Sentiment + Recommended):")
print(top_20_airlines)




ML Težine:
value_money_rating: 0.716
seat_comfort_rating: 0.060
cabin_staff_rating: 0.156
food_beverages_rating: 0.068

TOP 20 AVIOKOMPANIJA (ML + Sentiment + Recommended):
airline_name
Qatar Airways               8.47
Singapore Airlines          8.39
Lufthansa                   8.01
Qantas Airways              7.90
Thai Airways                7.90
Cathay Pacific Airways      7.85
Klm Royal Dutch Airlines    7.75
Malaysia Airlines           7.74
Emirates                    7.46
Turkish Airlines            7.32
British Airways             7.05
Virgin Atlantic Airways     6.82
Jet Airways                 6.67
Air France                  6.61
Delta Air Lines             6.60
Etihad Airways              6.38
Air Canada                  5.94
American Airlines           5.30
United Airlines             5.24
Us Airways                  4.94
Name: final_score_adjusted, dtype: float64


In [11]:
sia = SentimentIntensityAnalyzer()

lounge_df_copy = lounge_df.copy()

lounge_columns = ['comfort_rating','cleanliness_rating','bar_beverages_rating','catering_rating','washrooms_rating','wifi_connectivity_rating','staff_service_rating']

X = lounge_df_copy[lounge_columns]
y = lounge_df_copy['overall_rating'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = GradientBoostingRegressor(random_state=42)
model.fit(X_train, y_train)


weights_lounge = model.feature_importances_
print("ML Težine:")
for col, w in zip(X.columns, weights_lounge):
    print(f"{col}: {w:.3f}")


def compute_ml_score(row):
    return sum(row[col] * w for col, w in zip(lounge_columns, weights_lounge))

lounge_df_copy['ml_score'] = lounge_df_copy.apply(compute_ml_score, axis=1)


ml_min, ml_max = lounge_df_copy['ml_score'].min(), lounge_df_copy['ml_score'].max()
lounge_df_copy['ml_score_norm'] = 1 + 9 * (lounge_df_copy['ml_score'] - ml_min) / (ml_max - ml_min)

def sentiment_to_1_10(text):
    comp = sia.polarity_scores(text)['compound']  
    return 1 + 9 * ((comp + 1) / 2)

lounge_df_copy['sentiment_score'] = lounge_df_copy['content'].apply(sentiment_to_1_10)


lounge_df_copy['final_score'] = (0.7 * lounge_df_copy['ml_score_norm'] + 0.3 * lounge_df_copy['sentiment_score'])

def adjust_by_recommended(row):
    score = row['final_score']
    if row['recommended'] == 'yes':
        score *= 1.05  # +5%
    elif row['recommended'] == 'no':
        score *= 0.95  # -5%
    # Ograničavanje na skalu 1–10
    score = min(max(score, 1), 10)
    return score

lounge_df_copy['final_score_adjusted'] = lounge_df_copy.apply(adjust_by_recommended, axis=1)


top_20_airlines_lounge = (
    lounge_df_copy.groupby('airline_name')['final_score_adjusted']
      .mean()
      .round(2) 
      .sort_values(ascending=False)
      .head(20)
)

print("\nTOP 20 AVIOKOMPANIJA (ML + Sentiment + Recommended)")
print(top_20_airlines_lounge)


ML Težine:
comfort_rating: 0.190
cleanliness_rating: 0.143
bar_beverages_rating: 0.184
catering_rating: 0.163
washrooms_rating: 0.067
wifi_connectivity_rating: 0.063
staff_service_rating: 0.190

TOP 20 AVIOKOMPANIJA (ML + Sentiment + Recommended)
airline_name
Qatar Airways               8.20
Turkish Airlines            8.01
Qantas Airways              7.88
Emirates                    7.46
Cathay Pacific Airways      7.40
Virgin Atlantic Airways     7.00
Etihad Airways              7.00
Lufthansa                   6.90
Malaysia Airlines           6.85
Thai Airways                6.82
Air Canada                  6.82
British Airways             6.77
Jet Airways                 6.63
Singapore Airlines          6.51
Us Airways                  6.50
Klm Royal Dutch Airlines    6.50
Delta Air Lines             5.93
American Airlines           5.91
Air France                  5.81
United Airlines             5.15
Name: final_score_adjusted, dtype: float64


In [12]:
sia = SentimentIntensityAnalyzer()

seat_df_copy = seat_df.copy()

seat_columns = ['seat_legroom_rating','seat_recline_rating','seat_width_rating','aisle_space_rating','viewing_tv_rating']


X = seat_df_copy[seat_columns]
y = seat_df_copy['overall_rating']  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = GradientBoostingRegressor(random_state=42)
model.fit(X_train, y_train)

weights_seat = model.feature_importances_
print("ML Težine:")
for col, w in zip(X.columns, weights_seat):
    print(f"{col}: {w:.3f}")

def compute_ml_score(row):
    return sum(row[col] * w for col, w in zip(seat_columns, weights_seat))

seat_df_copy['ml_score'] = seat_df_copy.apply(compute_ml_score, axis=1)

ml_min, ml_max = seat_df_copy['ml_score'].min(), seat_df_copy['ml_score'].max()
seat_df_copy['ml_score_norm'] = 1 + 9 * (seat_df_copy['ml_score'] - ml_min) / (ml_max - ml_min)


def sentiment_to_1_10(text):
    comp = sia.polarity_scores(text)['compound'] 
    return 1 + 9 * ((comp + 1) / 2)

seat_df_copy['sentiment_score'] = seat_df_copy['content'].apply(sentiment_to_1_10)


seat_df_copy['final_score'] = (0.7 * seat_df_copy['ml_score_norm'] +0.3 * seat_df_copy['sentiment_score'])


def adjust_by_recommended(row):
    score = row['final_score']
    if row['recommended'] == 'yes':
        score *= 1.05  
    elif row['recommended'] == 'no':
        score *= 0.95  
    score = min(max(score, 1), 10)
    return score

seat_df_copy['final_score_adjusted'] = seat_df_copy.apply(adjust_by_recommended, axis=1)


top_20_airlines_seat = (
    seat_df_copy.groupby('airline_name')['final_score_adjusted']
      .mean()
      .round(2) 
      .sort_values(ascending=False)
      .head(20)
)

print("\nTOP 20 AVIOKOMPANIJA (ML + Sentiment + Recommended):")
print(top_20_airlines_seat)


ML Težine:
seat_legroom_rating: 0.520
seat_recline_rating: 0.099
seat_width_rating: 0.276
aisle_space_rating: 0.071
viewing_tv_rating: 0.035

TOP 20 AVIOKOMPANIJA (ML + Sentiment + Recommended):
airline_name
Qatar Airways               7.89
Thai Airways                7.42
Malaysia Airlines           7.14
Qantas Airways              6.69
Turkish Airlines            6.53
Singapore Airlines          6.32
Jet Airways                 6.06
Air Canada                  5.87
Cathay Pacific Airways      5.47
British Airways             5.15
Emirates                    4.77
Lufthansa                   4.51
Air France                  4.42
Virgin Atlantic Airways     4.09
Delta Air Lines             3.92
Etihad Airways              3.91
United Airlines             3.64
Klm Royal Dutch Airlines    3.51
American Airlines           2.92
Us Airways                  2.83
Name: final_score_adjusted, dtype: float64


In [8]:
#funkcija koja generiše opis na osnovu važnosti aspekta
#prosleđuju se nazivi kolona, težine aspekata i dict koji predstavlja nazive na engleskom mapirane na nazive na srpskom
def generate_description_universal(columns, weights, col_map=None):
    """
    columns : list[str] - nazivi kolona/aspekata
    weights : list[float] - težine faktora (feature_importances)
    col_map : dict (opciono) - mapa kolona u čitljive nazive
    """
    if col_map is None:
        # Ako nema prosledjenog dict, koristi se originalni nazivi kolona
        col_map = {col: col for col in columns}
    
    # sortiranje kolona po težinama
    aspects_weights = sorted(zip(columns, weights), key=lambda x: x[1], reverse=True)
    
    # pretvaranje u mapirane nazive
    parts = [col_map.get(col, col) for col, w in aspects_weights]
    
    # kreiranje rečenice
    #if len(parts) >= 2:
    #    sentence = f"Ukupne ocene najviše zavise od {parts[0]} i {parts[1]}, dok {', '.join(parts[2:])} imaju manji uticaj."
    #else:
    #    sentence = f"Ukupne ocene najviše zavise od {', '.join(parts)}."
    
    sentence = f"Ukupne ocene najviše zavise od {parts[0]} i {parts[1]}, dok {', '.join(parts[2:])} imaju manji uticaj."
    
    return sentence

#pozivanje funkcije nad sva tri skupa podataka
airline_col_map = {
    'value_money_rating': 'odnos cene i kvaliteta',
    'seat_comfort_rating': 'komfor sedišta',
    'cabin_staff_rating': 'osoblje u kabini',
    'food_beverages_rating': 'hrana i piće'
}
description_airline = generate_description_universal(airline_columns, weights_airline, airline_col_map)
print("\nAirline opis važnosti aspekata:")
print(description_airline)


lounge_col_map = {
    'comfort_rating': 'komfor',
    'cleanliness_rating': 'čistoća',
    'bar_beverages_rating': 'piće u baru',
    'catering_rating': 'hranjenje',
    'washrooms_rating': 'toaleti',
    'wifi_connectivity_rating': 'wifi konekcija',
    'staff_service_rating': 'osoblje'
}
description_lounge = generate_description_universal(lounge_columns, weights_lounge, lounge_col_map)
print("\nLounge opis važnosti aspekata:")
print(description_lounge)


seat_col_map = {
    'seat_legroom_rating': 'prostor za noge',
    'seat_recline_rating': 'naslon sedišta',
    'seat_width_rating': 'širina sedišta',
    'aisle_space_rating': 'prostor uz prolaz',
    'viewing_tv_rating': 'TV ekran'
}
description_seat = generate_description_universal(seat_columns, weights_seat, seat_col_map)
print("\nSeat opis važnosti aspekata:")
print(description_seat)

#kreiranje dataframe-a za kasniji upis u bazu
description_df = pd.DataFrame([
    {"dataset": "airline", "description": description_airline},
    {"dataset": "lounge", "description": description_lounge},
    {"dataset": "seat", "description": description_seat}
])


Airline opis važnosti aspekata:
Ukupne ocene najviše zavise od odnos cene i kvaliteta i osoblje u kabini, dok hrana i piće, komfor sedišta imaju manji uticaj.

Lounge opis važnosti aspekata:
Ukupne ocene najviše zavise od osoblje i komfor, dok piće u baru, hranjenje, čistoća, toaleti, wifi konekcija imaju manji uticaj.

Seat opis važnosti aspekata:
Ukupne ocene najviše zavise od prostor za noge i širina sedišta, dok naslon sedišta, prostor uz prolaz, TV ekran imaju manji uticaj.


In [6]:
import psycopg2

# parametri konekcije ka bazi pdoataka
DB_HOST = "localhost"
DB_NAME = "airline_recommendations_db"
DB_USER = "postgres"
DB_PASSWORD = "postgres"
DB_PORT = 5432


try:
    # povezivanje sa bazom
    conn = psycopg2.connect(
        host=DB_HOST,
        database=DB_NAME,
        user=DB_USER,
        password=DB_PASSWORD,
        port=DB_PORT
    )
    cursor = conn.cursor()

    # brisanje prethodnih podataka
    cursor.execute("TRUNCATE TABLE airline_ratings_simple;")
    cursor.execute("TRUNCATE TABLE lounge_ratings_simple;")
    cursor.execute("TRUNCATE TABLE seat_ratings_simple;")

    # ubacivanje novih podataka
    for airline, score in top_20_airlines.items():
        cursor.execute(
            "INSERT INTO airline_ratings_simple (airline_name, overall_rating) VALUES (%s, %s);",
            (airline, float(score))
        )

    for airline, score in top_20_airlines_lounge.items():
        cursor.execute(
            "INSERT INTO lounge_ratings_simple (airline_name, overall_rating) VALUES (%s, %s);",
            (airline, float(score))
        )

    for airline, score in top_20_airlines_seat.items():
        cursor.execute(
            "INSERT INTO seat_ratings_simple (airline_name, overall_rating) VALUES (%s, %s);",
            (airline, float(score))
        )

    conn.commit()
    print("\nPodaci uspešno upisani u PostgreSQL!")

except Exception as e:
    print("Greška prilikom upisa u bazu:", e)

finally:
    if cursor:
        cursor.close()
    if conn:
        conn.close()



Podaci uspešno upisani u PostgreSQL!


In [9]:
import psycopg2

# Parametri konekcije
DB_HOST = "localhost"
DB_NAME = "airline_recommendations_db"
DB_USER = "postgres"
DB_PASSWORD = "postgres"
DB_PORT = 5432


try:
    conn = psycopg2.connect(
        host=DB_HOST,
        database=DB_NAME,
        user=DB_USER,
        password=DB_PASSWORD,
        port=DB_PORT
    )
    cursor = conn.cursor()

    # Brisanje prethodnih podataka
    cursor.execute("TRUNCATE TABLE aspect_importance_airline;")

    # Ubacivanje podataka
    for _, row in description_df.iterrows():
        cursor.execute(
            "INSERT INTO aspect_importance_airline (dataset, description) VALUES (%s, %s);",
            (row['dataset'], row['description'])
        )

    conn.commit()
    print("Opis važnosti aspekata uspešno upisan u bazu!")

except Exception as e:
    print("Greška prilikom upisa u bazu:", e)

finally:
    if cursor:
        cursor.close()
    if conn:
        conn.close()


Opis važnosti aspekata uspešno upisan u bazu!
