# Sentiment analysis avec Textblob-FR

### Moyenne des polarités et subjectivités par documents

In [13]:
import os
import pandas as pd
from textblob import TextBlob

def calculate_polarity_subjectivity(text):
    blob = TextBlob(text)
    return 100*blob.sentiment.polarity, 100*(blob.sentiment.subjectivity)

results = []
directory = "../data/txt_clean"
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        with open(os.path.join(directory, filename), 'r', encoding="utf-8") as file:
            text = file.read()
            polarity, subjectivity = calculate_polarity_subjectivity(text)
            results.append([filename, polarity, subjectivity])

df = pd.DataFrame(results, columns=['Document', 'Polarity', 'Subjectivity'])
df

Unnamed: 0,Document,Polarity,Subjectivity
0,KB_JB1051_1948-05-24_01-00003.txt,16.666667,33.333333
1,KB_JB1051_1948-05-27_01-00001.txt,0.000000,0.000000
2,KB_JB1051_1948-05-31_01-00001.txt,0.000000,0.000000
3,KB_JB1051_1948-06-02_01-00003.txt,33.333333,55.000000
4,KB_JB1051_1948-06-05_01-00003.txt,25.000000,62.500000
...,...,...,...
984,KB_JB838_1950-09-14_01-00003.txt,0.000000,0.000000
985,KB_JB838_1950-10-06_01-00003.txt,5.000000,17.500000
986,KB_JB838_1950-11-10_01-00005.txt,22.500000,70.000000
987,KB_JB838_1950-11-30_01-00002.txt,-40.000000,70.000000


### Nombre de documents regroupé par polarité et subjectivité

In [14]:
count = len(df[(df['Polarity'] == 0) & (df['Subjectivity'] == 0)])
print(f"The number of texts with 0 polarity and 0 subjectivity is: {count}")

negative_count = len(df[df['Polarity'] < 0])
positive_count = len(df[df['Polarity'] > 0])
zero_subjectivity_negative_polarity_count = len(df[(df['Polarity'] < 0) & (df['Subjectivity'] == 0)])
zero_subjectivity_positive_polarity_count = len(df[(df['Polarity'] > 0) & (df['Subjectivity'] == 0)])
positive_subjectivity_negative_polarity_count = len(df[(df['Polarity'] < 0) & (df['Subjectivity'] > 0)])
positive_subjectivity_positive_polarity_count = len(df[(df['Polarity'] > 0) & (df['Subjectivity'] > 0)])
positive_subjectivity_zero_polarity_count = len(df[(df['Polarity'] == 0) & (df['Subjectivity'] > 0)])
positive_subjectivity_positive_polarity_count = len(df[(df['Polarity'] > 0) & (df['Subjectivity'] > 0)])

print(f"The number of texts with 0 subjectivity and negative polarity is: {zero_subjectivity_negative_polarity_count}")
print(f"The number of texts with 0 subjectivity and positive polarity is: {zero_subjectivity_positive_polarity_count}")
print(f"The number of texts with positive subjectivity and negative polarity is: {positive_subjectivity_negative_polarity_count}")
print(f"The number of texts with positive subjectivity and positive polarity is: {positive_subjectivity_positive_polarity_count}")
print(f"The number of texts with positive subjectivity and 0 polarity is: {positive_subjectivity_zero_polarity_count}")
print(f"The number of texts with positive subjectivity and positive polarity is: {positive_subjectivity_positive_polarity_count}")


The number of texts with 0 polarity and 0 subjectivity is: 445
The number of texts with 0 subjectivity and negative polarity is: 6
The number of texts with 0 subjectivity and positive polarity is: 28
The number of texts with positive subjectivity and negative polarity is: 164
The number of texts with positive subjectivity and positive polarity is: 297
The number of texts with positive subjectivity and 0 polarity is: 49
The number of texts with positive subjectivity and positive polarity is: 297


In [16]:
filtered_df = df[(df['Polarity'] != 0) | (df['Subjectivity'] != 0)]
mean_polarity = filtered_df['Polarity'].mean()
mean_subjectivity = filtered_df['Subjectivity'].mean()

print(f"Mean Polarity: {mean_polarity}")
print(f"Mean Subjectivity: {mean_subjectivity}")

median_polarity = filtered_df['Polarity'].median()
median_subjectivity = filtered_df['Subjectivity'].median()

print(f"Median Polarity: {median_polarity}")
print(f"Median Subjectivity: {median_subjectivity}")

Mean Polarity: 4.316891564864583
Mean Subjectivity: 51.14607024943816
Median Polarity: 6.458333333333334
Median Subjectivity: 50.0


In [17]:
def calculate_polarity_subjectivity(text):
    blob = TextBlob(text)
    return 100*blob.sentiment.polarity, 100*blob.sentiment.subjectivity

results = []
directory = "../data/txt_clean"
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        year = filename.split('_')[2].split('-')[0]  # the year is the third part of the filename
        with open(os.path.join(directory, filename), 'r', encoding="utf-8") as file:
            text = file.read()
            polarity, subjectivity = calculate_polarity_subjectivity(text)
            results.append([year, filename, polarity, subjectivity])

df = pd.DataFrame(results, columns=['Year', 'Document', 'Polarity', 'Subjectivity'])

for year, group in df.groupby('Year'):
    print(f"\nYear: {year}")
    count = len(group[(group['Polarity'] == 0) & (group['Subjectivity'] == 0)])
    print(f"The number of texts with 0 polarity and 0 subjectivity is: {count}")

    negative_count = len(group[group['Polarity'] < 0])
    positive_count = len(group[group['Polarity'] > 0])
    zero_subjectivity_negative_polarity_count = len(group[(group['Polarity'] < 0) & (group['Subjectivity'] == 0)])
    zero_subjectivity_positive_polarity_count = len(group[(group['Polarity'] > 0) & (group['Subjectivity'] == 0)])
    positive_subjectivity_negative_polarity_count = len(group[(group['Polarity'] < 0) & (group['Subjectivity'] > 0)])
    positive_subjectivity_positive_polarity_count = len(group[(group['Polarity'] > 0) & (group['Subjectivity'] > 0)])
    positive_subjectivity_zero_polarity_count = len(group[(group['Polarity'] == 0) & (group['Subjectivity'] > 0)])

    print(f"The number of texts with 0 subjectivity and negative polarity is: {zero_subjectivity_negative_polarity_count}")
    print(f"The number of texts with 0 subjectivity and positive polarity is: {zero_subjectivity_positive_polarity_count}")
    print(f"The number of texts with positive subjectivity and negative polarity is: {positive_subjectivity_negative_polarity_count}")
    print(f"The number of texts with positive subjectivity and positive polarity is: {positive_subjectivity_positive_polarity_count}")
    print(f"The number of texts with positive subjectivity and 0 polarity is: {positive_subjectivity_zero_polarity_count}")


Year: 1948
The number of texts with 0 polarity and 0 subjectivity is: 158
The number of texts with 0 subjectivity and negative polarity is: 4
The number of texts with 0 subjectivity and positive polarity is: 10
The number of texts with positive subjectivity and negative polarity is: 75
The number of texts with positive subjectivity and positive polarity is: 151
The number of texts with positive subjectivity and 0 polarity is: 29

Year: 1949
The number of texts with 0 polarity and 0 subjectivity is: 217
The number of texts with 0 subjectivity and negative polarity is: 1
The number of texts with 0 subjectivity and positive polarity is: 14
The number of texts with positive subjectivity and negative polarity is: 73
The number of texts with positive subjectivity and positive polarity is: 105
The number of texts with positive subjectivity and 0 polarity is: 13

Year: 1950
The number of texts with 0 polarity and 0 subjectivity is: 70
The number of texts with 0 subjectivity and negative polar

In [18]:
results = []
directory = "../data/txt_clean"
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        year = filename.split('_')[2].split('-')[0]  # the year is the third part of the filename
        with open(os.path.join(directory, filename), 'r', encoding="utf-8") as file:
            text = file.read()
            polarity, subjectivity = calculate_polarity_subjectivity(text)
            results.append([year, filename, polarity, subjectivity])

df = pd.DataFrame(results, columns=['Year', 'Document', 'Polarity', 'Subjectivity'])

for year, group in df.groupby('Year'):
    print(f"\nYear: {year}")
    total_docs = len(group)
    count = len(group[(group['Polarity'] == 0) & (group['Subjectivity'] == 0)])
    print(f"The percentage of texts with 0 polarity and 0 subjectivity is: {count/total_docs*100}%")

    negative_count = len(group[group['Polarity'] < 0])
    positive_count = len(group[group['Polarity'] > 0])
    zero_subjectivity_negative_polarity_count = len(group[(group['Polarity'] < 0) & (group['Subjectivity'] == 0)])
    zero_subjectivity_positive_polarity_count = len(group[(group['Polarity'] > 0) & (group['Subjectivity'] == 0)])
    positive_subjectivity_negative_polarity_count = len(group[(group['Polarity'] < 0) & (group['Subjectivity'] > 0)])
    positive_subjectivity_positive_polarity_count = len(group[(group['Polarity'] > 0) & (group['Subjectivity'] > 0)])
    positive_subjectivity_zero_polarity_count = len(group[(group['Polarity'] == 0) & (group['Subjectivity'] > 0)])

    print(f"The percentage of texts with 0 subjectivity and negative polarity is: {zero_subjectivity_negative_polarity_count/total_docs*100}%")
    print(f"The percentage of texts with 0 subjectivity and positive polarity is: {zero_subjectivity_positive_polarity_count/total_docs*100}%")
    print(f"The percentage of texts with positive subjectivity and negative polarity is: {positive_subjectivity_negative_polarity_count/total_docs*100}%")
    print(f"The percentage of texts with positive subjectivity and positive polarity is: {positive_subjectivity_positive_polarity_count/total_docs*100}%")
    print(f"The percentage of texts with positive subjectivity and 0 polarity is: {positive_subjectivity_zero_polarity_count/total_docs*100}%")


Year: 1948
The percentage of texts with 0 polarity and 0 subjectivity is: 37.00234192037471%
The percentage of texts with 0 subjectivity and negative polarity is: 0.936768149882904%
The percentage of texts with 0 subjectivity and positive polarity is: 2.3419203747072603%
The percentage of texts with positive subjectivity and negative polarity is: 17.56440281030445%
The percentage of texts with positive subjectivity and positive polarity is: 35.36299765807963%
The percentage of texts with positive subjectivity and 0 polarity is: 6.791569086651054%

Year: 1949
The percentage of texts with 0 polarity and 0 subjectivity is: 51.30023640661938%
The percentage of texts with 0 subjectivity and negative polarity is: 0.2364066193853428%
The percentage of texts with 0 subjectivity and positive polarity is: 3.309692671394799%
The percentage of texts with positive subjectivity and negative polarity is: 17.257683215130022%
The percentage of texts with positive subjectivity and positive polarity is: