# import libraries

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import seaborn as sns
from plotly.subplots import make_subplots

from datasist.structdata import detect_outliers


# open dataframe

In [None]:
df=pd.read_csv('hotel_review_final.csv')

In [None]:
df.head()

# EDA & outliers

In [None]:
df.describe()

In [None]:
df.describe(include='object')

#### standard deviation in average_hotel_score

In [None]:
df['average_hotel_score'].mean() , df['average_hotel_score'].median()

In [None]:
df['average_hotel_score'].mode()

In [None]:
sns.boxplot(x='average_hotel_score', data=df, palette='viridis')

In [None]:
len(detect_outliers(df , 0 , ['average_hotel_score']))

In [None]:
detect_outliers(df , 0 , ['average_hotel_score'])

In [None]:
detect_outliers(df , 1 , ['average_hotel_score'])

#### only statistic but i cannot delete it because it is ordinary to find some hotels that take alot of scoring because of alot of reviewers and occupation in it so removing it will change the data and influance the logic of occupation of hotels snd acerage score of it regarding logic 

In [None]:
df['additional_number_of_scoring'].mean() , df['additional_number_of_scoring'].median()

In [None]:
df['additional_number_of_scoring'].mode()

In [None]:
px.box(df,x='additional_number_of_scoring')

In [None]:
detect_outliers(df , 0 , ['additional_number_of_scoring'])

In [None]:
len(detect_outliers(df , 0 , ['additional_number_of_scoring']))

In [None]:
len(detect_outliers(df , 1 , ['additional_number_of_scoring']))

#### total number of reviews have a left sloping

In [None]:
df['total_number_of_reviews'].mean() , df['total_number_of_reviews'].median()

In [None]:
df['total_number_of_reviews'].mode()

#### there is a mathematically outliers but cannot be consider as an outliers because it is ordinary to have a big number of reviews and there is no reviews in minus show up and this will be consider more in charts 

In [None]:
Q1= df['total_number_of_reviews'].quantile(.25)

Q3= df['total_number_of_reviews'].quantile(.75)

IQR = Q3 - Q1

LOWER_BOUND= Q1 - 1.5   * IQR

UPPER_BOUND= Q3 + 1.5    * IQR

outliers = df['total_number_of_reviews'][(df['total_number_of_reviews'] < LOWER_BOUND) | (df['total_number_of_reviews'] > UPPER_BOUND)]

print("Q1:" , Q1)
print("Q3:" , Q3)
print("lower_bound:" , LOWER_BOUND)
print("upper_bound:" , UPPER_BOUND)
print("Outliers:" , outliers)



In [None]:
len(detect_outliers(df , 0 , ['total_number_of_reviews']))

In [None]:
len(detect_outliers(df , 1 , ['total_number_of_reviews']))

In [None]:
sns.kdeplot(df['total_number_of_reviews'])

#### reviewer scores are in normal and within range and cannot consider that an outliers because no minus and all normal to have a least and big scores among hotels from reviewers 

In [None]:
df['reviewer_score'].mean() , df['reviewer_score'].median()

In [None]:
df['reviewer_score'].mode()

In [None]:
sns.kdeplot(df['reviewer_score'])

In [None]:
len(detect_outliers(df , 0 , ['reviewer_score']))

In [None]:
len(detect_outliers(df , 1 , ['reviewer_score']))

# univariate analysis

In [None]:
df.columns

# 1) nationalities that put their reviews on the site and number of them 

In [None]:
df['reviewer_nationality'].value_counts(ascending=False).head(10).T.to_frame().reset_index()

### A) total of them

In [None]:
df1=df['reviewer_nationality'].value_counts(ascending=False).T.to_frame().reset_index()

df1.columns = ['reviewer_nationality' , 'count' ]


In [None]:
fig = px.treemap(
    df1,
    path=['reviewer_nationality'],
    values='count',
    color='count',
    color_continuous_scale='Viridis',
    title='Reviewer Nationalities Treemap'
)

# تصميم الشارت
fig.update_traces(
    texttemplate="%{label}<br>%{value}",  # إظهار الاسم والقيمة
    textfont_size=20,                    # حجم الخط
    textfont_color="white"               # لون الخط أبيض للوضوح
)

fig.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white'
)

# عرض الشارت
fig.show()

### B) top ten 

In [None]:
fig1 = make_subplots(
    rows=1, cols=2,
    specs=[[{'type':'domain'}, {'type':'xy'}]],  # pie , bar
    subplot_titles=("Top 10 Reviewer Nationalities (Pie)", "count of top ten reviewers")
)

#dataframe
df2=df['reviewer_nationality'].value_counts(ascending=False).head(10).T.to_frame().reset_index()

df2.columns = ['reviewer_nationality' , 'count' ]


# Pie
fig1.add_trace(
    go.Pie(
        labels=df2['reviewer_nationality'],
        values=df2['count'],
        name="Pie Chart"
    ),
    row=1, col=1)


# Bar
fig1.add_trace(
    go.Bar(
        x=df2['reviewer_nationality'] , 
        y=df2['count'] ,text=df2['count'],
        textposition='auto',
        marker_color='lightskyblue'
    ),
    row=1, col=2
)

fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    width=1200,
    height=600
)

### c) the least 10 

In [None]:
df['reviewer_nationality'].value_counts(ascending=True).head(10).T.to_frame().reset_index()

In [None]:
fig2 = make_subplots(
    rows=1, cols=2,
    specs=[[{'type':'domain'}, {'type':'xy'}]],  # pie , bar
    subplot_titles=("Top 10 Reviewer Nationalities (Pie)", "count of top ten reviewers")
)

#dataframe
df3=df['reviewer_nationality'].value_counts(ascending=True).head(10).T.to_frame().reset_index()

df3.columns = ['reviewer_nationality' , 'count' ]


# Pie
fig2.add_trace(
    go.Pie(
        labels=df3['reviewer_nationality'],
        values=df3['count'],
        name="Pie Chart"
    ),
    row=1, col=1)


# Bar
fig2.add_trace(
    go.Bar(
        x=df3['reviewer_nationality'] , 
        y=df3['count'] ,text=df3['count'],
        textposition='auto',
        marker_color='lightskyblue'
    ),
    row=1, col=2
)

fig2.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    width=1200,
    height=600
)

# 2) total_number_of_reviews

In [None]:
df.columns

In [None]:
df['total_number_of_reviews'].sort_values().max()

In [None]:
df['total_number_of_reviews'].sort_values().min()

# 3) review_month & year  (total reviewers distribution)

In [None]:
df.columns

In [None]:
df4= df['review_month'].value_counts(ascending=False).head().T.to_frame().reset_index()

df4.columns = ['review_month' , 'count']
df4

In [None]:
fig3 = px.bar(df4 , x= 'review_month' , y='count' , color='review_month' , text_auto=True)

fig3.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    width=600,
    height=600
)






In [None]:
df5= df['review_month'].value_counts(ascending=False).T.to_frame().reset_index()

df5.columns = ['review_month' , 'count']
df5

In [None]:
fig4 = px.pie(df5 , names='review_month' , values='count',width=600,height=600)

fig4.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
)


fig4.show()

In [None]:
df21= df['review_year'].value_counts(ascending=False).head().T.to_frame().reset_index()

df21.columns = ['review_year' , 'count']
df21

In [None]:
fig22 = px.bar(df21 , x= 'review_year' , y='count' , color='review_year' , text_auto=True)

fig22.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    width=600,
    height=600
)


# 4) review_seasons

In [None]:
df.columns

In [None]:
df['review_season'].value_counts().reset_index()

In [None]:
order = df['review_season'].value_counts().index

px.histogram(df , x='review_season' ,
              text_auto=True ,
                color= 'review_season' ,
                  width=600 , height=600 ,
                  category_orders={'review_season': order} )



# 5) reviewer_trip_type

In [None]:
df.columns

In [None]:
df['reviewer_trip_type'].value_counts().to_frame()

In [None]:
px.histogram(df , x='reviewer_trip_type',
             color='reviewer_trip_type',
             text_auto=True)

# 6) travelers_trip_type

In [None]:
df6 = df['travelers_type'].value_counts().to_frame().reset_index()

df6.columns = ['travelers_type' , 'count']

df6

In [None]:
fig5 = make_subplots(
    rows=1, cols=2,
    specs=[[{'type':'domain'}, {'type':'xy'}]],  # pie , bar
    subplot_titles=("top reviewers_trip_type percentage ", "reviewers_trip_type")
)


fig5.add_trace(
    go.Pie(
        labels=df6['travelers_type'],
        values=df6['count'],
        name = "pie chart",
    ) , row=1, col=1
    )

fig5.add_trace(
    go.Bar(
        x=df6['travelers_type'] , 
        y=df6['count'] ,text=df6['count'],
        textposition='auto',
        marker_color='lightskyblue'
    ),
    row=1, col=2
)

fig5.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    width=1200,
    height=600
)

# 7) pets_allowed

In [None]:
df.columns

In [None]:
df['pets_allowed'].value_counts().to_frame()

In [None]:
fig6 = px.histogram(df ,
                     x='pets_allowed',
                       text_auto=True )

fig6.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    width=500,
    height=600)

# bi-variate analysis

# 8)how many additional number of scoring done for each hotel 

In [None]:
df.columns

In [None]:
df7 = df[['additional_number_of_scoring', 'hotel_name']].drop_duplicates()

df7.sort_values(by='additional_number_of_scoring' , ascending=False).reset_index().drop('index' , axis=1)


In [None]:
df7.sort_values(by='additional_number_of_scoring' , ascending=False).reset_index().drop('index' , axis=1).head(10)


### the most ten hotels scoring without written review

In [None]:
fig7= px.bar(df7.sort_values(by='additional_number_of_scoring' , ascending=False).reset_index().drop('index' , axis=1).head(10) ,
        x='hotel_name' ,
          y='additional_number_of_scoring' , 
          text_auto=True ,
          color='hotel_name',
          title='top ten additional scoring number')


fig7.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    height=900
    )

# 9) the average score of each hotel & top 10 

In [None]:
df.columns

In [None]:
df8 = df[['average_hotel_score' , 'hotel_name']].drop_duplicates()

df8.sort_values(by='average_hotel_score', ascending=False).reset_index().drop('index' , axis=1 )



In [None]:
df8.sort_values(by='average_hotel_score', ascending=False).reset_index().drop('index' , axis=1 ).head(10)


In [None]:
fig8 = px.bar(
    df8.sort_values(by='average_hotel_score', ascending=False).reset_index().drop('index' , axis=1 ).head(10),
    y='average_hotel_score',
    x='hotel_name',  
    text='average_hotel_score', 
    color='average_hotel_score',   
    color_continuous_scale='Viridis',
    title='Average Hotel Score'
)

fig8.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    yaxis={'categoryorder':'total ascending'}  # ترتيب الفنادق حسب التقييم
)



### least average score hotels

In [None]:
df8.sort_values(by='average_hotel_score', ascending=True).reset_index().drop('index' , axis=1 ).head(10)


In [None]:
px.bar(
    df8.sort_values(by='average_hotel_score', ascending=True).reset_index().drop('index' , axis=1 ).head(10),
    y='average_hotel_score',
    x='hotel_name',  
    text='average_hotel_score', 
    color='average_hotel_score',   
    color_continuous_scale='Viridis',
    title='Average Hotel Score'
)

# 10) how many reviews written for each hotel and the most hotel that take written reviews among them and the least 

In [None]:
df.columns

In [None]:
df9 = df[['hotel_name' , 'total_number_of_reviews' , 'additional_number_of_scoring']].drop_duplicates().reset_index().drop('index' , axis=1 )
df9['written_reviews'] = df['total_number_of_reviews'] - df['additional_number_of_scoring']

df9.drop('additional_number_of_scoring' , axis=1 ,inplace=True)

df9.drop('total_number_of_reviews' , axis=1 ,inplace=True)

df9.sort_values(by='written_reviews' , ascending=False).head(10)

In [None]:
df9.sort_values(by='written_reviews' , ascending=True).head(10)

# 11) correlation between number of words in negative and positive reviews and reviewer score 

In [None]:
df.columns

In [None]:
df[['review_total_negative_word_counts',
    'review_total_positive_word_counts',
    'reviewer_score']].corr()


In [None]:
corr_df = df[['review_total_negative_word_counts',
              'review_total_positive_word_counts',
              'reviewer_score']].corr()
px.imshow(
    corr_df,
    text_auto=True,
    title="Correlation Heatmap",
    width=600,
    height=600
)



# 12) what is the average reviewers score for each nationality and top 10 

In [None]:
df.columns

In [None]:
df11 = df.groupby('reviewer_nationality')['reviewer_score'].mean().to_frame().reset_index().sort_values('reviewer_score', ascending=False)
df11

In [None]:
px.treemap(
    df11,
    path=['reviewer_nationality'],
    values='reviewer_score',
    color='reviewer_score',
    color_continuous_scale='Viridis',
    title='Treemap of Average Reviewer Score by Nationality'
)

In [None]:
df12 = df.groupby('reviewer_nationality')['reviewer_score'].mean().to_frame().reset_index().sort_values('reviewer_score', ascending=False).head(10)

df12

In [None]:
fig11 = px.bar(
    df12,
    x='reviewer_nationality',
    y='reviewer_score',
    text='reviewer_score',
    title='Top 10 Nationalities by Average Reviewer Score',
    height=500,
    color='reviewer_nationality'
)


fig11.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white'
)

# 13) what is the most visited and reviewed  five hotels in each season 

In [None]:
df.columns

#### summer

In [None]:
df['review_season'].unique()

In [None]:
df13 = df[df['review_season'] =='Summer']

In [None]:
df13 = df13['hotel_name'].value_counts(ascending=False).head().reset_index() 

df13.columns = ['hotel_name' , 'count']

df13 = df13.merge(df[['hotel_name', 'average_hotel_score']].drop_duplicates(), on='hotel_name')

df13

#### winter

In [None]:
df13_1 = df[df['review_season'] =='Winter']

In [None]:
df13_1 = df13_1['hotel_name'].value_counts(ascending=False).head().reset_index() 

df13_1.columns = ['hotel_name' , 'count']


df13_1 = df13_1.merge(df[['hotel_name', 'average_hotel_score']].drop_duplicates(), on='hotel_name')


df13_1

#### autumn

In [None]:
df13_2 = df[df['review_season'] =='Autumn']

In [None]:
df13_2= df13_2['hotel_name'].value_counts(ascending=False).head().reset_index() 

df13_2.columns = ['hotel_name' , 'count']

df13_2 = df13_2.merge(df[['hotel_name', 'average_hotel_score']].drop_duplicates(), on='hotel_name')


df13_2

#### spring

In [None]:
df13_3 = df[df['review_season'] =='Spring']

In [None]:
df13_3 = df13_3['hotel_name'].value_counts(ascending=False).head().reset_index() 

df13_3.columns = ['hotel_name' , 'count']

df13_3 = df13_3.merge(df[['hotel_name', 'average_hotel_score']].drop_duplicates(), on='hotel_name')


df13_3

# 14) the average score each of them get by visitors

In [None]:
fig12 = make_subplots(
    rows=2, cols=2,
    subplot_titles=("Summer", "Winter", "Autumn", "Spring")
)


#summer
fig12.add_trace( go.Bar( x = df13['hotel_name']  ,
                        y= df13['count'], 
                        marker_color='lightskyblue',
        text=[f"Count: {c}<br>Score: {s}" for c,s in zip(df13['count'], df13['average_hotel_score'])],
        textposition='auto'),
row=1, col=1
)

#winter
fig12.add_trace( go.Bar( x = df13_1['hotel_name']  ,
                        y= df13_1['count'], 
                        marker_color='blue',
        text=[f"Count: {c}<br>Score: {s}" for c,s in zip(df13_1['count'], df13_1['average_hotel_score'])],
        textposition='auto'),
row=1, col=2
)

#autumn
fig12.add_trace( go.Bar( x = df13_2['hotel_name']  ,
                        y= df13_2['count'], 
                        marker_color='lightskyblue',
        text=[f"Count: {c}<br>Score: {s}" for c,s in zip(df13_2['count'], df13_2['average_hotel_score'])],
        textposition='auto'),
row=2, col=1
)


#spring

fig12.add_trace( go.Bar( x = df13_3['hotel_name']  ,
                        y= df13_3['count'], 
                        marker_color='lightskyblue',
        text=[f"Count: {c}<br>Score: {s}" for c,s in zip(df13_3['count'], df13_3['average_hotel_score'])],
        textposition='auto'),
row=2, col=2
)



fig12.update_layout(
    height=800, width=1200,
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_text="Most Reviewed Hotels by Season (with Average Score)"
)

# 15) positive vs negative word counts per each hotel 

In [None]:
df.columns

In [None]:
px.scatter(df, x='review_total_positive_word_counts', y='review_total_negative_word_counts',
           size='total_number_of_reviews', color='total_number_of_reviews',
           title='Positive vs Negative Word Counts per Hotel',
           hover_data=['hotel_name'])

# 16) where is the top 20 hotels in average score exist on map 

In [None]:
df.columns

In [None]:
df15 = df[['hotel_name' , 'average_hotel_score' , 'hotel_address' , 'lat', 'lng']].drop_duplicates()

top_20 = df15.sort_values(by='average_hotel_score' , ascending=False).reset_index(drop=True).head(20)
top_20

In [None]:
top_20['lat_jitter'] = top_20['lat'] + np.random.uniform(-0.0005, 0.0005, size=len(top_20))
top_20['lng_jitter'] = top_20['lng'] + np.random.uniform(-0.0005, 0.0005, size=len(top_20))

# تحديد مركز الخريطة تلقائيًا
center_lat = top_20['lat'].mean()
center_lon = top_20['lng'].mean()

fig = px.scatter_mapbox(
    top_20,
    lat='lat_jitter',
    lon='lng_jitter',
    hover_name='hotel_name',
    hover_data=['hotel_address','average_hotel_score'],
    color='average_hotel_score',
    size='average_hotel_score',
    size_max=15,  
    zoom=3,
    height=600,
    mapbox_style='carto-positron'
)
fig.update_layout(mapbox_center={"lat": center_lat, "lon": center_lon})

fig.show()


# 17) the best hotels around the Mediterranean coast (Barcelona area):

In [None]:
df.columns

In [None]:
# Example: filter hotels near the Mediterranean coast
# Approximate lat/lon box (Barcelona area)
sea_hotels = df15[
    (df15['lat'] > 41.3) & (df15['lat'] < 41.5) &
    (df15['lng'] > 2.0) & (df15['lng'] < 2.2)
]

# Top 10 by average score
sea_top10 = sea_hotels.sort_values(by='average_hotel_score', ascending=False).head(10)

sea_top10

In [None]:

# Add jitter to avoid overlapping points
sea_top10['lat_jitter'] = sea_top10['lat'] + np.random.uniform(-0.0005, 0.0005, size=len(sea_top10))
sea_top10['lng_jitter'] = sea_top10['lng'] + np.random.uniform(-0.0005, 0.0005, size=len(sea_top10))

# Calculate map center
center_lat = sea_top10['lat'].mean()
center_lon = sea_top10['lng'].mean()

# Scatter Map
fig = px.scatter_mapbox(
    sea_top10,
    lat='lat_jitter',
    lon='lng_jitter',
    hover_name='hotel_name',
    hover_data=['hotel_address','average_hotel_score'],
    color='average_hotel_score',
    size='average_hotel_score',
    size_max=15,
    zoom=12,  # zoom in more for coastal area
    height=600,
    mapbox_style='carto-positron'
)

fig.update_layout(mapbox_center={"lat": center_lat, "lon": center_lon})
fig.show()

# 18) what is the top five hotels scores regarding each travelers type 

In [None]:
df.columns

In [None]:
df15= df[['hotel_name', 'travelers_type', 'average_hotel_score']].drop_duplicates()

df15.groupby(['travelers_type','hotel_name'])['average_hotel_score'].max().sort_values(ascending=False).reset_index()


In [None]:
top5_per_traveler = (
    df15.groupby('travelers_type')
         .apply(lambda x: x.nlargest(5, 'average_hotel_score'))
         .reset_index(drop=True)
)

top5_per_traveler = top5_per_traveler[['travelers_type', 'hotel_name', 'average_hotel_score']]

top5_per_traveler

In [None]:
fig15 = px.bar(
    top5_per_traveler,
    x='average_hotel_score',
    y='hotel_name',
    color='travelers_type',
    orientation='h',
    text='average_hotel_score',
    title='Top 5 Hotels per Traveler Type by Average Score',
    height=600
)

fig15.show()

# 19) what is the average of the reviewers_score for each hotel regarding each year and the top 5 in each year

In [None]:
df.columns

In [None]:
avg_score_year = ( df.groupby(['hotel_name','review_year'])['reviewer_score'] .mean() .reset_index() .sort_values(['hotel_name','review_year']) )

avg_score_year


In [None]:
df16= avg_score_year[avg_score_year['review_year'] == 2015].sort_values(by='reviewer_score', ascending=False).head()
df16.columns = ['hotel_name' , 'review_year','review_score' ]
df16

In [None]:
df16_1=avg_score_year[avg_score_year['review_year'] == 2016].sort_values(by='reviewer_score', ascending=False).head()
df16_1.columns = ['hotel_name' , 'review_year','review_score' ]

df16_1

In [None]:
df16_2=avg_score_year[avg_score_year['review_year'] == 2017].sort_values(by='reviewer_score', ascending=False).head()
df16_2.columns = ['hotel_name' , 'review_year','review_score' ]

df16_2

In [None]:
fig16 = make_subplots(
    rows=1, cols=3,
    subplot_titles=("2015", "2016", "2017")
)

fig16.add_trace(
    go.Bar(
        x=df16['hotel_name'],
        y=df16['review_score'],
        marker=dict(
            color=df16['review_score'],      
            colorscale='Rainbow'
        ),
        text=df16['review_score'],
        textposition='auto'
    ),
    row=1, col=1
)

fig16.add_trace( go.Bar( x = df16_1['hotel_name']  ,
                        y= df16_1['review_score'], 
                        marker=dict(
            color=df16['review_score'],      
            colorscale='Rainbow'
        ),
                        text=df16_1['review_score'],
        textposition='auto'),
row=1, col=2
)

fig16.add_trace( go.Bar( x = df16_2['hotel_name']  ,
                        y= df16_2['review_score'], 
                        marker=dict(
            color=df16['review_score'],      
            colorscale='Rainbow'
        ),
                        text=df16_2['review_score'],
        textposition='auto'),
row=1, col=3
)


fig16.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white')    # يخلي النص أبيض
)

# تحسين الزوايا والقراءة
fig16.update_xaxes(tickangle=45)


# 20)  the most nationality the give written reviews  

In [None]:
df.columns

In [None]:
df17 = df[['hotel_name' , 'total_number_of_reviews' , 'additional_number_of_scoring','reviewer_nationality']]

df17['written_reviews'] = df['total_number_of_reviews'] - df['additional_number_of_scoring']

df17.drop(['total_number_of_reviews' , 'additional_number_of_scoring' ], axis=1 , inplace=True)
df17

In [None]:


nat_count = (
    df17.groupby('reviewer_nationality')['written_reviews']
      .count()
      .sort_values(ascending=False)
      .head(20)
)

In [None]:
px.bar(
    nat_count,
    x=nat_count.index,
    y=nat_count.values,
    title="Top 20 Nationalities by Number of Reviewers",
    color=nat_count.values,
    color_continuous_scale="Turbo",
    height=600,
    text_auto= True
)

In [None]:
nat_avg_reviews = (
    df.groupby('reviewer_nationality')['total_number_of_reviews_reviewer_has_given']
      .mean()
      .sort_values(ascending=False)
      .head(20)
)

# 21) Average Number of Reviews Given per Reviewer (Top 20 Nationalities)

In [None]:
px.bar(
    nat_avg_reviews,
    x=nat_avg_reviews.values,
    y=nat_avg_reviews.index,
    orientation='h',
    title="Average Number of Reviews Given per Reviewer (Top 20 Nationalities)",
    color=nat_avg_reviews.values,
    color_continuous_scale="Plasma",
    height=700
)



# 22) distribution of reviews given ber reviewers by their nationality 

In [None]:
df.columns

In [None]:
px.box(
    df,
    x='reviewer_nationality',
    y='total_number_of_reviews_reviewer_has_given',
    title="Distribution of Reviews Given per Reviewer by Nationality",
    points='suspectedoutliers',
    height=900
)

In [None]:
px.scatter(
    df,
    x='reviewer_nationality',
    y='total_number_of_reviews_reviewer_has_given',
    color='total_number_of_reviews_reviewer_has_given',
    size='total_number_of_reviews_reviewer_has_given',
    title="Review Activity by Nationalities",
    height=800
)

# 23) what is the most 20 nationality that travels with pets and number of viewers that did that 

In [None]:
df['pets_allowed'].unique()

In [None]:
df.columns

In [None]:
df_pets = df[df['pets_allowed'] == ' With a pet ']

# حساب أكثر الجنسيات كتابةً في هذه الفنادق
top_nationalities_pets = (
    df_pets['reviewer_nationality']
    .value_counts()
    .head(20)   # أعلى 20 جنسية
)

top_nationalities_pets.to_frame().reset_index()

In [None]:
fig17 = px.bar(
    top_nationalities_pets.to_frame().reset_index(),
    x=top_nationalities_pets.index,
    y=top_nationalities_pets.values,
    text=top_nationalities_pets.values,
    title="Top 20 Nationalities Reviewing Hotels With Pets Allowed",
    color=top_nationalities_pets.values,
    color_continuous_scale="Viridis")


fig17.update_layout(
    plot_bgcolor='gray',       # خلفية الرسم الداخلي
    paper_bgcolor='black',      # خلفية الصفحة الخارجية
    font=dict(color='white'),   # نص أبيض
    title_font=dict(size=22, color='white'),
    xaxis=dict(tickangle=45)    # تدوير أسماء الجنسيات لو طويلة
)

# 24) total reviews distributions among each year 

In [None]:
reviews_per_year = df.groupby('review_year')['total_number_of_reviews_reviewer_has_given'].sum().reset_index()
fig30 = px.bar(
    reviews_per_year,
    x='review_year',
    y='total_number_of_reviews_reviewer_has_given',
    title="Total Reviews by Year",
    color='total_number_of_reviews_reviewer_has_given',
    text='total_number_of_reviews_reviewer_has_given',
    color_continuous_scale='Viridis',
    height=500
)

# تصميم الشارت للداش بورد الداكن
fig30.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    xaxis_title="Year",
    yaxis_title="Total Reviews"
)