# Importing Libraries

In [4]:
import pandas as pd
import regex
import re
import emoji
from collections import Counter
import plotly.graph_objs as go
import plotly.express as px
import plotly.subplots as sp

# Reading Data

In [11]:
data = pd.read_excel('clean_data_full.xlsx', nrows=500)
print(type(data)) 
data = data[['Final Feedback']]

<class 'pandas.core.frame.DataFrame'>


#### If want to remove heart emoji

In [12]:
from collections import Counter

def remove_heart(text):
    emoj = re.compile("\u2764\uFE0F")
    return re.sub(emoj, '', text)

data['text_without_heart'] = data['Final Feedback'].apply(remove_heart)

# Extract total emojis

In [14]:
import regex
def extract_emojis(text):
   emoji_list = []
   data = regex.findall(r'\X', text)
   for word in data:
       if any(char in emoji.UNICODE_EMOJI['en'] for char in word):
           emoji_list.append(word)
   return emoji_list

data['emoji_list'] = data['text_without_heart'].apply(extract_emojis)

# Extracting unique emojis

In [15]:
emoji_list = [emoji for sublist in data['emoji_list'].tolist() for emoji in sublist]

emoji_counts = Counter(emoji_list)
print("Total number of emojis: ", sum(emoji_counts.values()))

for emoji, count in emoji_counts.items():
   print(f"{emoji}: {count}")

Total number of emojis:  51
💪: 7
😊: 19
😉: 8
💪🏻: 2
🔥: 3
💙: 1
💫: 2
💪🏼: 3
🎉: 3
🙌: 1
🌟: 1
👏: 1


# Bar plot of different emojis

In [16]:
emoji_df = pd.DataFrame.from_records(list(emoji_counts.items()), columns=['Emoji', 'Count'])
fig1 = px.bar(emoji_df, x='Count', y='Emoji', title='Total Number of Emojis for 500 data')
fig1.show()

# Each 100 examples data emojis

In [17]:
fig = sp.make_subplots(rows=len(data) // 100, cols=1, subplot_titles=[f'Rows {i * 100 + 1}-{(i + 1) * 100}' for i in range(len(data) // 100)])

for i in range(0, len(data), 100):
    sampled_df = data.iloc[i:i+100]
    sampled_emoji_list = [emoji for sublist in sampled_df['emoji_list'].tolist() for emoji in sublist]
    sampled_counts = Counter(sampled_emoji_list)
    
    sampled_emoji_df = pd.DataFrame.from_records(list(sampled_counts.items()), columns=['Emoji', 'Count'])
    trace = go.Bar(x=sampled_emoji_df['Emoji'], y=sampled_emoji_df['Count'], name=f'Rows {i + 1}-{i + 100}')
    
    fig.add_trace(trace, row=i // 100 + 1, col=1)
    
fig.update_layout(height=800, width=1200, title_text='Number of Each Unique Emoji 100 Rows')
fig.show()

fig.write_image("output_plot.png", width=1200, height=800)

# Total data emojis representations and count

In [19]:
sampled_df = data.head(500)
sampled_emoji_list = [emoji for sublist in sampled_df['emoji_list'].tolist() for emoji in sublist]
sampled_counts = Counter(sampled_emoji_list)
# Create a DataFrame from the Counter object for the sampled emojis
sampled_emoji_df = pd.DataFrame.from_records(list(sampled_counts.items()), columns=['Emoji', 'Count'])
# Sort the DataFrame by count in descending order
sampled_emoji_df = sampled_emoji_df.sort_values(by='Count', ascending=False)
# Limit the x-axis to the top N emojis (adjust N as needed)
top_n = 10
sampled_emoji_df = sampled_emoji_df.head(top_n)
# Create a bar plot for the number of each unique emoji in the first 500 rows (excluding heart emojis)
fig = go.Figure()
fig.add_trace(go.Bar(x=sampled_emoji_df['Emoji'], y=sampled_emoji_df['Count']))
# Update layout
fig.update_layout(
    title='Emojis in the First 500 Rows (excluding heart emojis)',
    xaxis_title='Emoji',
    yaxis_title='Count',
    xaxis=dict(tickmode='array', tickvals=list(range(top_n)), ticktext=sampled_emoji_df['Emoji']),
    showlegend=False
)
# Show the figure
fig.show()