# Sentiment analysis avec Textblob-FR

### Moyenne des polarités et subjectivités par documents

In [None]:
import os
import pandas as pd
from textblob import TextBlob

def calculate_polarity_subjectivity(text):
    blob = TextBlob(text)
    return 100*blob.sentiment.polarity, 100*(blob.sentiment.subjectivity)

results = []
directory = "../data/txt_clean"
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        with open(os.path.join(directory, filename), 'r', encoding="utf-8") as file:
            text = file.read()
            polarity, subjectivity = calculate_polarity_subjectivity(text)
            results.append([filename, polarity, subjectivity])

df = pd.DataFrame(results, columns=['Document', 'Polarity', 'Subjectivity'])
df

### Nombre de documents regroupé par polarité et subjectivité

In [None]:
count = len(df[(df['Polarity'] == 0) & (df['Subjectivity'] == 0)])
print(f"The number of texts with 0 polarity and 0 subjectivity is: {count}")

negative_count = len(df[df['Polarity'] < 0])
positive_count = len(df[df['Polarity'] > 0])
zero_subjectivity_negative_polarity_count = len(df[(df['Polarity'] < 0) & (df['Subjectivity'] == 0)])
zero_subjectivity_positive_polarity_count = len(df[(df['Polarity'] > 0) & (df['Subjectivity'] == 0)])
positive_subjectivity_negative_polarity_count = len(df[(df['Polarity'] < 0) & (df['Subjectivity'] > 0)])
positive_subjectivity_positive_polarity_count = len(df[(df['Polarity'] > 0) & (df['Subjectivity'] > 0)])
positive_subjectivity_zero_polarity_count = len(df[(df['Polarity'] == 0) & (df['Subjectivity'] > 0)])
positive_subjectivity_positive_polarity_count = len(df[(df['Polarity'] > 0) & (df['Subjectivity'] > 0)])

print(f"The number of texts with 0 subjectivity and negative polarity is: {zero_subjectivity_negative_polarity_count}")
print(f"The number of texts with 0 subjectivity and positive polarity is: {zero_subjectivity_positive_polarity_count}")
print(f"The number of texts with positive subjectivity and negative polarity is: {positive_subjectivity_negative_polarity_count}")
print(f"The number of texts with positive subjectivity and positive polarity is: {positive_subjectivity_positive_polarity_count}")
print(f"The number of texts with positive subjectivity and 0 polarity is: {positive_subjectivity_zero_polarity_count}")
print(f"The number of texts with positive subjectivity and positive polarity is: {positive_subjectivity_positive_polarity_count}")


In [None]:
filtered_df = df[(df['Polarity'] != 0) | (df['Subjectivity'] != 0)]
mean_polarity = filtered_df['Polarity'].mean()
mean_subjectivity = filtered_df['Subjectivity'].mean()

print(f"Mean Polarity: {mean_polarity}")
print(f"Mean Subjectivity: {mean_subjectivity}")

median_polarity = filtered_df['Polarity'].median()
median_subjectivity = filtered_df['Subjectivity'].median()

print(f"Median Polarity: {median_polarity}")
print(f"Median Subjectivity: {median_subjectivity}")

In [None]:
def calculate_polarity_subjectivity(text):
    blob = TextBlob(text)
    return 100*blob.sentiment.polarity, 100*blob.sentiment.subjectivity

results = []
directory = "../data/txt_clean"
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        year = filename.split('_')[2].split('-')[0]  # the year is the third part of the filename
        with open(os.path.join(directory, filename), 'r', encoding="utf-8") as file:
            text = file.read()
            polarity, subjectivity = calculate_polarity_subjectivity(text)
            results.append([year, filename, polarity, subjectivity])

df = pd.DataFrame(results, columns=['Year', 'Document', 'Polarity', 'Subjectivity'])

for year, group in df.groupby('Year'):
    print(f"\nYear: {year}")
    count = len(group[(group['Polarity'] == 0) & (group['Subjectivity'] == 0)])
    print(f"The number of texts with 0 polarity and 0 subjectivity is: {count}")

    negative_count = len(group[group['Polarity'] < 0])
    positive_count = len(group[group['Polarity'] > 0])
    zero_subjectivity_negative_polarity_count = len(group[(group['Polarity'] < 0) & (group['Subjectivity'] == 0)])
    zero_subjectivity_positive_polarity_count = len(group[(group['Polarity'] > 0) & (group['Subjectivity'] == 0)])
    positive_subjectivity_negative_polarity_count = len(group[(group['Polarity'] < 0) & (group['Subjectivity'] > 0)])
    positive_subjectivity_positive_polarity_count = len(group[(group['Polarity'] > 0) & (group['Subjectivity'] > 0)])
    positive_subjectivity_zero_polarity_count = len(group[(group['Polarity'] == 0) & (group['Subjectivity'] > 0)])

    print(f"The number of texts with 0 subjectivity and negative polarity is: {zero_subjectivity_negative_polarity_count}")
    print(f"The number of texts with 0 subjectivity and positive polarity is: {zero_subjectivity_positive_polarity_count}")
    print(f"The number of texts with positive subjectivity and negative polarity is: {positive_subjectivity_negative_polarity_count}")
    print(f"The number of texts with positive subjectivity and positive polarity is: {positive_subjectivity_positive_polarity_count}")
    print(f"The number of texts with positive subjectivity and 0 polarity is: {positive_subjectivity_zero_polarity_count}")

In [None]:
results = []
directory = "../data/txt_clean"
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        year = filename.split('_')[2].split('-')[0]  # the year is the third part of the filename
        with open(os.path.join(directory, filename), 'r', encoding="utf-8") as file:
            text = file.read()
            polarity, subjectivity = calculate_polarity_subjectivity(text)
            results.append([year, filename, polarity, subjectivity])

df = pd.DataFrame(results, columns=['Year', 'Document', 'Polarity', 'Subjectivity'])

for year, group in df.groupby('Year'):
    print(f"\nYear: {year}")
    total_docs = len(group)
    count = len(group[(group['Polarity'] == 0) & (group['Subjectivity'] == 0)])
    print(f"The percentage of texts with 0 polarity and 0 subjectivity is: {count/total_docs*100}%")

    negative_count = len(group[group['Polarity'] < 0])
    positive_count = len(group[group['Polarity'] > 0])
    zero_subjectivity_negative_polarity_count = len(group[(group['Polarity'] < 0) & (group['Subjectivity'] == 0)])
    zero_subjectivity_positive_polarity_count = len(group[(group['Polarity'] > 0) & (group['Subjectivity'] == 0)])
    positive_subjectivity_negative_polarity_count = len(group[(group['Polarity'] < 0) & (group['Subjectivity'] > 0)])
    positive_subjectivity_positive_polarity_count = len(group[(group['Polarity'] > 0) & (group['Subjectivity'] > 0)])
    positive_subjectivity_zero_polarity_count = len(group[(group['Polarity'] == 0) & (group['Subjectivity'] > 0)])

    print(f"The percentage of texts with 0 subjectivity and negative polarity is: {zero_subjectivity_negative_polarity_count/total_docs*100}%")
    print(f"The percentage of texts with 0 subjectivity and positive polarity is: {zero_subjectivity_positive_polarity_count/total_docs*100}%")
    print(f"The percentage of texts with positive subjectivity and negative polarity is: {positive_subjectivity_negative_polarity_count/total_docs*100}%")
    print(f"The percentage of texts with positive subjectivity and positive polarity is: {positive_subjectivity_positive_polarity_count/total_docs*100}%")
    print(f"The percentage of texts with positive subjectivity and 0 polarity is: {positive_subjectivity_zero_polarity_count/total_docs*100}%")