### Rule Base

Imports and Initialization

In [None]:
!pip install langdetect googletrans==4.0.0-rc1
!pip install pandas vaderSentiment



In [None]:
import pandas as pd
from langdetect import detect
from googletrans import Translator
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
# Initialize VADER sentiment analyzer and Google Translator
analyzer = SentimentIntensityAnalyzer()
translator = Translator()

 Data Loading and Preprocessing

In [None]:
# Load the CSV file
file_path = 'AI in edu dataset - Sheet1.csv'  # Replace with the correct path to your file
data = pd.read_csv(file_path)

# Ensure the "Content" column exists
if 'Content' not in data.columns:
    raise ValueError('CSV file does not have a "Content" column.')

# Drop rows with missing or NaN values in the "Content" column
data = data.dropna(subset=['Content'])

# Ensure all values in "Content" are strings
data['Content'] = data['Content'].astype(str)

# Print the first few rows to inspect the data
print(data.head())


                                             Content Platform Sentiment
0  So cheating vs resourcesful is just someone pt...      NaN   Neutral
1  What many people fail to realize din is being ...      NaN  Negative
2  Dami paliwanag. Cheating naman talaga rin ang ...      NaN  Negative
3  aside sa education, there are many problems we...      NaN   Neutral
4  malamang, ofc kasalanan ng magulang kasi wlang...      NaN  Negative


Language Detection and Translation

In [None]:
def detect_language(text):
    try:
        lang = detect(text)
        return 'en' if lang == 'en' else 'tl'  # Assume non-English is Taglish
    except:
        return 'unknown'

# Apply language detection to each row
data['Detected_Language'] = data['Content'].apply(detect_language)

# Separate English and Taglish content
english_content = data[data['Detected_Language'] == 'en']
taglish_content = data[data['Detected_Language'] == 'tl']

# Display results
print("WORLDWIDE Content:")
print(english_content[['Content', 'Detected_Language']].head())
print("\nPELIPENS Content:")
print(taglish_content[['Content', 'Detected_Language']].head())

# Count of each language
language_counts = data['Detected_Language'].value_counts()
print("\nLanguage Counts:")
print(language_counts)

WORLDWIDE Content:
                                             Content Detected_Language
1  What many people fail to realize din is being ...                en
3  aside sa education, there are many problems we...                en
4  malamang, ofc kasalanan ng magulang kasi wlang...                en
5  The education system that AI will destroy will...                en
6  AI tool sare very helpful but you need to use ...                en

PELIPENS Content:
                                               Content Detected_Language
0    So cheating vs resourcesful is just someone pt...                tl
2    Dami paliwanag. Cheating naman talaga rin ang ...                tl
228  AI tutors are doubling student engagement in e...                tl
242  Ok sya, nagbibigay ng idea at pang research, m...                tl
243  Kung aasa kayo sa AI to do school work, kayo d...                tl

Language Counts:
Detected_Language
en    290
tl    141
Name: count, dtype: int64


In [131]:
# Function to detect language and translate if not English
def detect_and_translate(text):
    try:
        # Detect language of the text
        lang = detect(text)

        # Translate to English if the detected language is not English
        if lang != 'en':
            translated = translator.translate(text, src=lang, dest='en').text
            return translated
        return text
    except Exception as e:
        print(f"Error during translation: {e}")
        return text  # Return original text if detection/translation fails

# Loop through the first few rows and show the original and translated text
for index, row in data.head(10).iterrows():  # Adjust the number as needed
    original_text = row['Content']
    translated_text = detect_and_translate(original_text)

    print(f"Row {index + 1}") , print('=' * 1000)
    print(f"Original Text: {original_text}")
    print(f"Translated Text: {translated_text}")
    print('=' * 1000)



Row 1
Original Text: So cheating vs resourcesful is just someone pt of view? Tapos you follow rules and u expect innovations? Alam ko lang bukambibig ng talonan ang pandaraya. Kung lahat ng tao nag papafollow ng rules di nasa stone age pa tayo.
Translated Text: So cheating vs resourcesful is just someone PT of view?Then you follow rules and u expect innovations?I just know the scandal will make the fraud.If everyone goes to the rules we are not in the stone age.
Row 2
Original Text: What many people fail to realize din is being cunning is not the same as being resourceful; and not following the rules doesn't equate to innovation or progress. \n\nCrime is also not following the rules, is it then progress? Hindi diba. Kasi you prioritize yourself and being cunning to get an edge rather than allowing yourself to be evaluated properly and maadress whatever you are lacking. Kaya nga we need experts din to look into the objectives sa class. \n\nIf ang objective is being evaluated for what yo

Sentiment Analysis

In [133]:
# Function to analyze sentiment using VADER
def analyze_sentiment(text):
    sentiment_scores = analyzer.polarity_scores(text)
    compound_score = sentiment_scores['compound']  # Use compound score

    # Rule-based classification based on compound score
    if compound_score >= 0.05:
        sentiment = 'Positive'
    elif compound_score <= -0.05:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'

    return sentiment, sentiment_scores

# Apply language detection to each row
data['Detected_Language'] = data['Content'].apply(detect_language)

# Apply translation and sentiment analysis to each row
results = []
for index, row in data.iterrows():
    text = row['Content']
    lang = row['Detected_Language']
    translated_text = detect_and_translate(text)  # Translate if necessary
    sentiment, scores = analyze_sentiment(translated_text)

    results.append({
        'Original Content': text,
        'Translated Content': translated_text,
        'Sentiment': sentiment,
        'Compound Score': scores['compound'],
        'Positive Score': scores['pos'],
        'Neutral Score': scores['neu'],
        'Negative Score': scores['neg'],
        'Detected_Language': lang
    })

# Convert the results to a DataFrame
sentiment_df = pd.DataFrame(results)

# Save the results to a new CSV file (optional)
output_file = 'sentiment_analysis_with_translation.csv'
sentiment_df.to_csv(output_file, index=False)

# Print the first few rows of the results for inspection
print(sentiment_df.head())


                                    Original Content  \
0  So cheating vs resourcesful is just someone pt...   
1  What many people fail to realize din is being ...   
2  Dami paliwanag. Cheating naman talaga rin ang ...   
3  aside sa education, there are many problems we...   
4  malamang, ofc kasalanan ng magulang kasi wlang...   

                                  Translated Content Sentiment  \
0  So cheating vs resourcesful is just someone PT...  Negative   
1  What many people fail to realize din is being ...  Negative   
2  Volume explanation.Cheating is also a failing....  Negative   
3  aside sa education, there are many problems we...  Negative   
4  malamang, ofc kasalanan ng magulang kasi wlang...  Negative   

   Compound Score  Positive Score  Neutral Score  Negative Score  \
0         -0.7994           0.000          0.830           0.170   
1         -0.4374           0.051          0.872           0.077   
2         -0.5661           0.000          0.908           0.0

Sentiment Count


In [134]:
# Count sentiments overall
overall_sentiment_counts = sentiment_df['Sentiment'].value_counts()

# Count sentiments by language
english_sentiment_counts = sentiment_df[sentiment_df['Detected_Language'] == 'en']['Sentiment'].value_counts()
taglish_sentiment_counts = sentiment_df[sentiment_df['Detected_Language'] == 'tl']['Sentiment'].value_counts()

# Print the counts for inspection
print("Overall Sentiment Counts:")
print(overall_sentiment_counts)

print("\nWorldwide Sentiment Counts:")
print(english_sentiment_counts)

print("\nPelepens Sentiment Counts:")
print(taglish_sentiment_counts)


Overall Sentiment Counts:
Sentiment
Positive    238
Negative     97
Neutral      96
Name: count, dtype: int64

Worldwide Sentiment Counts:
Sentiment
Positive    187
Negative     59
Neutral      44
Name: count, dtype: int64

Pelepens Sentiment Counts:
Sentiment
Neutral     52
Positive    51
Negative    38
Name: count, dtype: int64
