# Import Libraries

In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [50]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize, word_tokenize
import re
import string

In [51]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vrushank\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vrushank\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Read Data

In [52]:
df = pd.read_csv('final_data.csv')
df.head()

Unnamed: 0,URL_ID,URL,article
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp..."
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...


# Data Pre-processing

### Removing '\n' from all articles.

In [53]:
df['article'] = df['article'].str.replace('\n','')

In [54]:
df['article'][0]

'Rising IT cities and its impact on the economy, environment, infrastructure, and city life by the year 2040.We have seen a huge development and dependence of people on technology in recent years. We have also seen the development of AI and ChatGPT in recent years. So it is a normal thing that we will become fully dependent on technology by 2040. Information technology will be a major power for all the developing nations. As a member of a developing nation, India is rapidly growing its IT base. It has also grown some IT cities which will be the major control centres for Information technology by 2040.Rising IT citiesNoida:- Noida in Uttar Pradesh near New Delhi is an emerging IT sector now. Many large companies like Google, Microsoft, IBM, Infosys and others have set up their companies here. Noida has a market base of billions of dollars and is doing a great job of boosting the national economy. The establishment of so many software companies has made Noida an information technology hu

# Sentiment Analysis

### Remove Stopwords

In [55]:
import os
os.listdir('StopWords')

['StopWords_Auditor.txt',
 'StopWords_Currencies.txt',
 'StopWords_DatesandNumbers.txt',
 'StopWords_Generic.txt',
 'StopWords_GenericLong.txt',
 'StopWords_Geographic.txt',
 'StopWords_Names.txt']

In [56]:
# Directory containing stopwords lists
stopwords_dir = 'StopWords'

# Function to read stopwords from a file
def read_stopwords(file_path):
    with open(file_path, 'r') as file:
        return [line.strip() for line in file]

# Initialize an empty set for all stopwords
all_stopwords = set()

# Iterate over all files in the stopwords directory and merge stopwords
for filename in os.listdir(stopwords_dir):
    file_path = os.path.join(stopwords_dir, filename)
    if os.path.isfile(file_path):
        stopwords = read_stopwords(file_path)
        all_stopwords.update(stopwords)  # Use a set to avoid duplicate stopwords

In [57]:
# Function to remove stopwords from a text
def remove_stopwords(text):
    words = text.split()  # Split text into words
    filtered_words = [word for word in words if word.lower() not in all_stopwords]
    return ' '.join(filtered_words)  # Join filtered words back into a single string

df['article_cleaned'] = df['article'].apply(remove_stopwords)

df.head()

Unnamed: 0,URL_ID,URL,article,article_cleaned
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf..."
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf..."
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp..."
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...


### Create Positive words and Negative words lists.

In [58]:
# Directory containing the positive and negative words files
master_dictionary_dir = 'MasterDictionary'
stopwords_dir = 'Stopwords'

# File names for positive and negative words
positive_words_file = 'positive-words.txt'
negative_words_file = 'negative-words.txt'

# Function to read words from a file
def read_words(file_path):
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
        # Skip lines that are comments (starting with ';')
        words = [line.strip() for line in file if not line.startswith(';')]
    return words

# Read positive words
positive_words_path = os.path.join(master_dictionary_dir, positive_words_file)
positive_words = [word for word in read_words(positive_words_path) if word.lower() not in all_stopwords]

# Read negative words
negative_words_path = os.path.join(master_dictionary_dir, negative_words_file)
negative_words = [word for word in read_words(negative_words_path) if word.lower() not in all_stopwords]

# Print some of the positive and negative words to verify
print(f"First 10 positive words: {positive_words[:10]}")
print(f"First 10 negative words: {negative_words[:10]}")


First 10 positive words: ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
First 10 negative words: ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


In [59]:
len(positive_words)

1988

In [60]:
len(negative_words)

4779

### Converting the text into a list of tokens using the nltk tokenize module and then performing 'Sentiment Analysis'.

In [61]:
def calculate_scores(text):
    tokens = word_tokenize(text.lower())  

    positive_score = sum(1 for token in tokens if token in positive_words)
    negative_score = sum(1 for token in tokens if token in negative_words)

    # Ensure negative_score is positive
    negative_score_abs = abs(negative_score)
    
    polarity_score = (positive_score - negative_score_abs) / ((positive_score + negative_score_abs) + 0.000001)
    
    total_words = len(tokens)
    subjectivity_score = (positive_score + negative_score_abs) / (total_words + 0.000001)
    
    return positive_score, negative_score_abs, polarity_score, subjectivity_score

df[['POSITIVE SCORE', 'NEGATIVE SCORE', 'POLARITY SCORE', 'SUBJECTIVITY SCORE']] = df['article_cleaned'].apply(
    lambda text: pd.Series(calculate_scores(text))
)

df.head()

Unnamed: 0,URL_ID,URL,article,article_cleaned,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf...",33.0,5.0,0.736842,0.054755
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf...",60.0,29.0,0.348315,0.087255
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp...",38.0,24.0,0.225806,0.079794
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...,37.0,72.0,-0.321101,0.142857
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...,21.0,8.0,0.448276,0.065611


### Analysis of Readability

In [62]:
def is_complex(word):
    syllable_count = 0
    vowels = 'aeiouy'
    word = word.lower()
    if len(word) <= 3:
        return False
    if word[0] in vowels:
        syllable_count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            syllable_count += 1
    if word.endswith('e'):
        syllable_count -= 1
    if syllable_count > 2:
        return True
    return False

# Function to calculate readability metrics
def calculate_readability_metrics(text):

    sentences = sent_tokenize(text)
    words = word_tokenize(text)

    num_sentences = len(sentences)
    num_words = len(words)

    # Avoid division by zero
    if num_sentences == 0:
        num_sentences = 1
    if num_words == 0:
        num_words = 1

    # Calculate average sentence length
    avg_sentence_length = num_words / num_sentences

    # Calculate the number of complex words
    num_complex_words = sum(1 for word in words if is_complex(word))

    # Calculate percentage of complex words
    percent_complex_words = num_complex_words / num_words

    # Calculate Fog Index
    fog_index = 0.4 * (avg_sentence_length + percent_complex_words)

    return avg_sentence_length, percent_complex_words, fog_index


df[['AVG SENTENCE LENGTH', 'PERCENTAGE OF COMPLEX WORDS', 'FOG INDEX']] = df['article_cleaned'].apply(
    lambda text: pd.Series(calculate_readability_metrics(text))
)

df.head()


Unnamed: 0,URL_ID,URL,article,article_cleaned,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf...",33.0,5.0,0.736842,0.054755,12.192982,0.238849,4.972733
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf...",60.0,29.0,0.348315,0.087255,15.9375,0.333333,6.508333
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp...",38.0,24.0,0.225806,0.079794,16.891304,0.436293,6.931039
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...,37.0,72.0,-0.321101,0.142857,20.72973,0.408083,8.455125
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...,21.0,8.0,0.448276,0.065611,16.37037,0.343891,6.685705


### Average Number of Words Per Sentence

In [63]:
def average_words_per_sentence(text):
    sentences = sent_tokenize(text)
    words = word_tokenize(text)
    
    num_sentences = len(sentences)
    num_words = len(words)
    
    # Avoid division by zero
    if num_sentences == 0:
        num_sentences = 1

    avg_words_per_sentence = num_words / num_sentences
    return avg_words_per_sentence

df['AVG NUMBER OF WORDS PER SENTENCE'] = df['article_cleaned'].apply(average_words_per_sentence)

df.head()

Unnamed: 0,URL_ID,URL,article,article_cleaned,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf...",33.0,5.0,0.736842,0.054755,12.192982,0.238849,4.972733,12.192982
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf...",60.0,29.0,0.348315,0.087255,15.9375,0.333333,6.508333,15.9375
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp...",38.0,24.0,0.225806,0.079794,16.891304,0.436293,6.931039,16.891304
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...,37.0,72.0,-0.321101,0.142857,20.72973,0.408083,8.455125,20.72973
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...,21.0,8.0,0.448276,0.065611,16.37037,0.343891,6.685705,16.37037


### Complex Word Count

In [64]:
# Function to calculate complex word count
def complex_word_count(text):
    words = word_tokenize(text)
    complex_words = sum(1 for word in words if is_complex(word))
    return complex_words

# Apply the function to the 'article_cleaned' column
df['COMPLEX WORD COUNT'] = df['article_cleaned'].apply(complex_word_count)

df.head()

Unnamed: 0,URL_ID,URL,article,article_cleaned,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf...",33.0,5.0,0.736842,0.054755,12.192982,0.238849,4.972733,12.192982,166
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf...",60.0,29.0,0.348315,0.087255,15.9375,0.333333,6.508333,15.9375,340
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp...",38.0,24.0,0.225806,0.079794,16.891304,0.436293,6.931039,16.891304,339
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...,37.0,72.0,-0.321101,0.142857,20.72973,0.408083,8.455125,20.72973,313
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...,21.0,8.0,0.448276,0.065611,16.37037,0.343891,6.685705,16.37037,152


### Word Count

In [65]:
from nltk.corpus import stopwords

In [66]:
def word_count(text):
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text)
    cleaned_words = [word.lower() for word in words if word.lower() not in stop_words and word.isalnum()]
    return len(cleaned_words)

df['WORD COUNT'] = df['article'].apply(word_count)

df.head()


Unnamed: 0,URL_ID,URL,article,article_cleaned,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf...",33.0,5.0,0.736842,0.054755,12.192982,0.238849,4.972733,12.192982,166,610
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf...",60.0,29.0,0.348315,0.087255,15.9375,0.333333,6.508333,15.9375,340,850
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp...",38.0,24.0,0.225806,0.079794,16.891304,0.436293,6.931039,16.891304,339,643
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...,37.0,72.0,-0.321101,0.142857,20.72973,0.408083,8.455125,20.72973,313,647
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...,21.0,8.0,0.448276,0.065611,16.37037,0.343891,6.685705,16.37037,152,388


### Syllable Count Per Word

In [67]:
import re

# Function to count syllables in a word
def syllable_count(word):
    word = word.lower()
    if len(word) <= 3:
        return 1  # Single syllable for short words
    
    # Remove trailing 'es', 'ed', 'e' (except for words like 'the' and 'be')
    word = re.sub(r'[esed]$', '', word)
    
    vowels = 'aeiouy'
    syllable_count = 0
    prev_char_was_vowel = False
    
    for char in word:
        if char in vowels:
            if not prev_char_was_vowel:
                syllable_count += 1
            prev_char_was_vowel = True
        else:
            prev_char_was_vowel = False
    
    # Handle cases where 'e' at the end of the word is silent
    if word.endswith('e') and not word.endswith('le'):
        syllable_count -= 1
    
    # Minimum syllable count is 1
    if syllable_count <= 0:
        syllable_count = 1
    
    return syllable_count

# Function to count syllables in each word of the text
def syllable_count_per_word(text):
    words = word_tokenize(text)
    syllable_counts = [syllable_count(word) for word in words]
    return syllable_counts

# Apply the function to the 'article_cleaned' column
df['SYLLABLE PER WORD'] = df['article_cleaned'].apply(syllable_count_per_word)

df.head()


Unnamed: 0,URL_ID,URL,article,article_cleaned,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE PER WORD
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf...",33.0,5.0,0.736842,0.054755,12.192982,0.238849,4.972733,12.192982,166,610,"[2, 1, 2, 4, 1, 4, 1, 4, 1, 2, 1, 1, 1, 1, 4, ..."
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf...",60.0,29.0,0.348315,0.087255,15.9375,0.333333,6.508333,15.9375,340,850,"[2, 1, 2, 4, 1, 4, 1, 4, 1, 2, 1, 5, 3, 1, 3, ..."
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp...",38.0,24.0,0.225806,0.079794,16.891304,0.436293,6.931039,16.891304,339,643,"[3, 2, 1, 1, 4, 1, 5, 2, 1, 1, 1, 1, 4, 7, 1, ..."
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...,37.0,72.0,-0.321101,0.142857,20.72973,0.408083,8.455125,20.72973,313,647,"[1, 3, 2, 3, 3, 1, 1, 1, 1, 4, 5, 1, 2, 2, 4, ..."
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...,21.0,8.0,0.448276,0.065611,16.37037,0.343891,6.685705,16.37037,152,388,"[1, 2, 2, 4, 3, 3, 1, 1, 1, 2, 2, 4, 1, 4, 1, ..."


### Calculate Personal Pronouns

In [68]:
import re

def count_personal_pronouns(text):
    # Regex pattern to find personal pronouns
    pattern = r'\b(?:I|we|my|ours|us)\b'
    # Find all matches in the text
    matches = re.findall(pattern, text, flags=re.IGNORECASE)
    # Return count of matches
    return len(matches)

df['PERSONAL PRONOUNS'] = df['article'].apply(count_personal_pronouns)

df.head()

Unnamed: 0,URL_ID,URL,article,article_cleaned,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE PER WORD,PERSONAL PRONOUNS
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf...",33.0,5.0,0.736842,0.054755,12.192982,0.238849,4.972733,12.192982,166,610,"[2, 1, 2, 4, 1, 4, 1, 4, 1, 2, 1, 1, 1, 1, 4, ...",12
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf...",60.0,29.0,0.348315,0.087255,15.9375,0.333333,6.508333,15.9375,340,850,"[2, 1, 2, 4, 1, 4, 1, 4, 1, 2, 1, 5, 3, 1, 3, ...",6
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp...",38.0,24.0,0.225806,0.079794,16.891304,0.436293,6.931039,16.891304,339,643,"[3, 2, 1, 1, 4, 1, 5, 2, 1, 1, 1, 1, 4, 7, 1, ...",13
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...,37.0,72.0,-0.321101,0.142857,20.72973,0.408083,8.455125,20.72973,313,647,"[1, 3, 2, 3, 3, 1, 1, 1, 1, 4, 5, 1, 2, 2, 4, ...",5
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...,21.0,8.0,0.448276,0.065611,16.37037,0.343891,6.685705,16.37037,152,388,"[1, 2, 2, 4, 3, 3, 1, 1, 1, 2, 2, 4, 1, 4, 1, ...",6


In [69]:
def average_word_length(text):
    words = word_tokenize(text)
    total_characters = sum(len(word) for word in words)
    total_words = len(words)
    
    # Avoid division by zero
    if total_words == 0:
        total_words = 1
    
    avg_word_length = total_characters / total_words
    return avg_word_length

df['AVG WORD LENGTH'] = df['article_cleaned'].apply(average_word_length)

df.head()

Unnamed: 0,URL_ID,URL,article,article_cleaned,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE PER WORD,PERSONAL PRONOUNS,AVG WORD LENGTH
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,Rising IT cities and its impact on the economy...,"Rising cities impact economy, environment, inf...",33.0,5.0,0.736842,0.054755,12.192982,0.238849,4.972733,12.192982,166,610,"[2, 1, 2, 4, 1, 4, 1, 4, 1, 2, 1, 1, 1, 1, 4, ...",12,5.726619
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,Rising IT Cities and Their Impact on the Econo...,"Rising Cities Impact Economy, Environment, Inf...",60.0,29.0,0.348315,0.087255,15.9375,0.333333,6.508333,15.9375,340,850,"[2, 1, 2, 4, 1, 4, 1, 4, 1, 2, 1, 5, 3, 1, 3, ...",6,6.168627
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,"Internet Demand’s Evolution, Communication Imp...","Internet Demand’s Evolution, Communication Imp...",38.0,24.0,0.225806,0.079794,16.891304,0.436293,6.931039,16.891304,339,643,"[3, 2, 1, 1, 4, 1, 5, 2, 1, 1, 1, 1, 4, 7, 1, ...",13,7.043758
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,Rise of Cybercrime and its Effect in upcoming ...,Rise Cybercrime Effect upcoming FutureThe live...,37.0,72.0,-0.321101,0.142857,20.72973,0.408083,8.455125,20.72973,313,647,"[1, 3, 2, 3, 3, 1, 1, 1, 1, 4, 5, 1, 2, 2, 4, ...",5,6.770535
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,OTT platform and its impact on the entertainme...,OTT platform impact entertainment industry Fut...,21.0,8.0,0.448276,0.065611,16.37037,0.343891,6.685705,16.37037,152,388,"[1, 2, 2, 4, 3, 3, 1, 1, 1, 2, 2, 4, 1, 4, 1, ...",6,6.678733


In [70]:
final_df = df.drop(['article', 'article_cleaned'], axis=1)
# final_df.to_excel('output_data.xlsx', index=False)

  final_df.to_excel('output_data.xlsx', index=False)
