In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
import spacy
from collections import Counter

In [None]:
# Télécharger les stopwords en anglais si vous ne les avez pas déjà téléchargés
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
# Charger le modèle de langue anglaise de spaCy
nlp = spacy.load("en_core_web_sm")

In [None]:
nlp

<spacy.lang.en.English at 0x7cea351a88b0>

In [None]:
# Définir les stopwords en anglais
stop_words = set(stopwords.words('english'))


In [None]:

# Charger le dataset
df = pd.read_csv('data-pesticide.csv')

In [None]:
df

Unnamed: 0,class_id,treatment_type,product_id,review_author_name,review_date,review_content,review_rating,review_is_verified
0,insects,Organic,B0BDVCCKKT,bay,"Reviewed in the United States on January 17, 2024",Garden Safe Slug & Snail Bait is a good produc...,5.0 out of 5 stars,Verified Purchase
1,insects,Organic,B0BDVCCKKT,Tom T.,"Reviewed in the United States on September 23,...",If you've ever engaged in the timeless battle ...,5.0 out of 5 stars,Verified Purchase
2,insects,Organic,B0BDVCCKKT,Ralph Robles,"Reviewed in the United States on September 2, ...",Does a good job. But have to apply daily.,4.0 out of 5 stars,Verified Purchase
3,insects,Organic,B0BDVCCKKT,Gabe Edmondson,"Reviewed in the United States on January 3, 2024",It seems to be working great. I was trying to ...,5.0 out of 5 stars,Verified Purchase
4,insects,Organic,B0BDVCCKKT,Nino,"Reviewed in the United States on February 14, ...",Good seller,5.0 out of 5 stars,Verified Purchase
...,...,...,...,...,...,...,...,...
10176,viruses,Chimique,B00ARKSABO,Adam W Toner,"Reviewed in the United States on September 24,...",I had another bottle like this purchased from ...,4.0 out of 5 stars,Verified Purchase
10177,viruses,Chimique,B00ARKSABO,Bill,"Reviewed in the United States on January 31, 2023","Generic form of Talstar, with no odor. Great f...",4.0 out of 5 stars,Verified Purchase
10178,viruses,Chimique,B00ARKSABO,Sergio A Cardenas martinez,"Reviewed in the United States on July 26, 2023",Lo utilice para las hormigas y funciono muy bn,5.0 out of 5 stars,Verified Purchase
10179,viruses,Chimique,B00ARKSABO,Kristen Caples,"Reviewed in the United States on July 25, 2023","Excellent weed killer, better than big box sto...",5.0 out of 5 stars,Verified Purchase


In [None]:
# Fonction pour supprimer les stopwords d'une chaîne de texte
def remove_stopwords(text):
    if pd.isnull(text):
        return ''
    tokens = text.split()
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
    return ' '.join(filtered_tokens)


In [None]:
# Appliquer la fonction remove_stopwords à la colonne 'review_content'
df['review_content'] = df['review_content'].apply(remove_stopwords)

In [None]:
import re

In [None]:
aspects_to_extract = ["price", "packaging", "package", "effect", "tomatoes", "tomato", "potatoes", "potato", "wheat", "fruit", "apple", "insects", "fungi", "fungus", "herbs", "bacteria", "viruses", "nematodes"]


In [None]:
len(aspects_to_extract)

18

In [None]:
# Function to extract aspects from a single review
def extract_aspects(review):
    extracted_aspects = []
    if isinstance(review, str):
        for aspect in aspects_to_extract:
            # Using regular expressions to find matches
            matches = re.findall(r'\b{}\b'.format(aspect), review, flags=re.IGNORECASE)
            if matches:
                extracted_aspects.extend(matches)
    return extracted_aspects

In [None]:
# Applying the function to the reviews column
df['extracted_aspects'] = df['review_content'].apply(extract_aspects)



In [None]:
# Viewing the DataFrame with extracted aspects
print(df[['review_content', 'extracted_aspects']])

                                          review_content extracted_aspects
0      Garden Safe Slug & Snail Bait is a good produc...           [price]
1      If you've ever engaged in the timeless battle ...                []
2              Does a good job. But have to apply daily.                []
3      It seems to be working great. I was trying to ...         [insects]
4                                            Good seller                []
...                                                  ...               ...
10176  I had another bottle like this purchased from ...                []
10177  Generic form of Talstar, with no odor. Great f...         [insects]
10178     Lo utilice para las hormigas y funciono muy bn                []
10179  Excellent weed killer, better than big box sto...                []
10180  It clearly shows that it works with the right ...                []

[10181 rows x 2 columns]


In [None]:
from collections import Counter

# Counter to count the occurrences of each aspect
aspect_counter = Counter()

# Iterate through the extracted aspects and count occurrences
for aspects_list in df['extracted_aspects']:
    aspect_counter.update(aspects_list)

# Convert Counter to dictionary
aspect_count_dict = dict(aspect_counter)

# Print the aspect count dictionary
print(aspect_count_dict)

{'price': 394, 'insects': 344, 'PRICE': 4, 'fungus': 634, 'packaging': 36, 'package': 109, 'fruit': 199, 'tomato': 111, 'potatoes': 14, 'Package': 7, 'tomatoes': 123, 'Potato': 10, 'Price': 19, 'apple': 60, 'effect': 129, 'Nematodes': 1, 'bacteria': 10, 'potato': 50, 'Effect': 4, 'nematodes': 4, 'Fungus': 18, 'Tomatoes': 7, 'herbs': 10, 'Insects': 4, 'fungi': 26, 'FUNGUS': 2, 'Fruit': 11, 'wheat': 1, 'Apple': 5, 'Tomato': 4, 'viruses': 1, 'FRUIT': 1, 'Packaging': 1}


In [None]:
aspects=list(aspect_count_dict.keys())

In [None]:
aspects = ['price', 'insects', 'fungus', 'packaging', 'package', 'fruit',
           'tomato', 'potato', 'wheat', 'apple', 'effect', 'nematodes',
           'bacteria', 'herbs', 'viruses']

In [None]:
# Normalize aspects to lowercase
normalized_aspects = [aspect.lower() for aspect in aspects]


In [None]:
# Normalize review_content to lowercase
df['review_content_lower'] = df['review_content'].str.lower()


In [None]:
aspect_dataframes={}

In [None]:
# Iterate over the normalized aspects
for aspect in normalized_aspects:
    # Filter the normalized review_content column for rows containing the current aspect
    filtered_df = df[df['review_content_lower'].str.contains(aspect, na=False)]
    # Store the filtered DataFrame in the dictionary using the aspect as the key
    aspect_dataframes[aspect] = filtered_df

In [None]:
aspect_dataframes.keys()

dict_keys(['price', 'insects', 'fungus', 'packaging', 'package', 'fruit', 'tomato', 'potato', 'wheat', 'apple', 'effect', 'nematodes', 'bacteria', 'herbs', 'viruses'])

In [None]:
# Now aspect_dataframes contains separate DataFrames for each aspect group, with case-insensitive matching

# Create a DataFrame for each aspect and assign it to a variable with the name of the aspect
for aspect, aspect_df in aspect_dataframes.items():
    globals()['df_' + aspect] = aspect_df

In [None]:
df_tomato['review_content']

105      I had no problem with pests on my tomato plant...
126      I am an organic gardener, and prefer a dry dus...
175      I have some sort of true bug (Hymenoptera) eat...
178      I have several tomato plants, cucumber plants ...
239      I used this product as directed to foliar feed...
                               ...                        
9857     I use this stuff like crazy.  It's the BEST ST...
9877                      Does the job to kill tomato bugs
9978     I fed my tomatoes with some of this iron. This...
10013    High quality and effective in the garden. Deli...
10017    This stuff helps with duck and geese poop smel...
Name: review_content, Length: 189, dtype: object

In [76]:
df_apple

Unnamed: 0,class_id,treatment_type,product_id,review_author_name,review_date,review_content,review_rating,review_is_verified,extracted_aspects,review_content_lower,aspect
163,insects,Organic,B00743LJ0G,Suzanne Schumacher,"Reviewed in the United States on June 4, 2013",I was told by my gardening company that this i...,5.0 out of 5 stars,Verified Purchase,"[fruit, apple]",i was told by my gardening company that this i...,apple
369,insects,Organic,B00D8NQ26C,Dan in MN,"Reviewed in the United States on July 15, 2022",My local fertilizer and farm elevator owner su...,5.0 out of 5 stars,Verified Purchase,"[fruit, apple]",my local fertilizer and farm elevator owner su...,apple
599,insects,Chimique,B07VGTR44T,Illinois Mom,"Reviewed in the United States on October 6, 2019",If you are having any issues with those little...,5.0 out of 5 stars,Verified Purchase,"[fruit, apple]",if you are having any issues with those little...,apple
649,insects,Chimique,B07VGTR44T,Jeannette,"Reviewed in the United States on April 14, 2020",Arrived faster than expected. I installed it a...,5.0 out of 5 stars,Verified Purchase,[apple],arrived faster than expected. i installed it a...,apple
913,insects,Chimique,B004S6ZZFM,Sal,"Reviewed in the United States on October 1, 2023",Moved to the country in 2021 and decided to pl...,5.0 out of 5 stars,Verified Purchase,"[apple, apple, fungus]",moved to the country in 2021 and decided to pl...,apple
924,insects,Chimique,B004S6ZZFM,Amazon Customer,"Reviewed in the United States on August 29, 2018",I decided to try this instead of Immunox same ...,5.0 out of 5 stars,Verified Purchase,"[apple, apple, apple, apple]",i decided to try this instead of immunox same ...,apple
945,insects,Chimique,B004S6ZZFM,AllenC,"Reviewed in the United States on July 30, 2018",Very successful treating apple scab on a crab ...,5.0 out of 5 stars,Verified Purchase,"[apple, apple, fungus]",very successful treating apple scab on a crab ...,apple
1259,insects,Organic,B00FQGS7LC,World travelin family,"Reviewed in the United States on October 11, 2017",The stuff totally saved our apple tree that we...,5.0 out of 5 stars,Verified Purchase,"[apple, fungus]",the stuff totally saved our apple tree that we...,apple
1771,insects,Chimique,B0751T4P2Q,Amazon Customer,"Reviewed in the United States on April 19, 2021","Great for enhancing the favor of fruit, used f...",5.0 out of 5 stars,Verified Purchase,"[fruit, apple]","great for enhancing the favor of fruit, used f...",apple
2263,insects,Organic,B001RPVUCY,Sudoku Caulton,"Reviewed in the United States on June 4, 2018",My baby apple tree (grown indoors from seed) w...,5.0 out of 5 stars,Verified Purchase,"[apple, apple]",my baby apple tree (grown indoors from seed) w...,apple


In [77]:
df_potato

Unnamed: 0,class_id,treatment_type,product_id,review_author_name,review_date,review_content,review_rating,review_is_verified,extracted_aspects,review_content_lower,aspect
114,insects,Organic,B00743LJ0G,Sam,"Reviewed in the United States on September 7, ...",Bugs on squash/ potatoes,5.0 out of 5 stars,Verified Purchase,[potatoes],bugs on squash/ potatoes,potato
130,insects,Organic,B00743LJ0G,Amazon Customer,"Reviewed in the United States on August 14, 2022",Did not work effectively on Coloado Potato bug...,3.0 out of 5 stars,Verified Purchase,[Potato],did not work effectively on coloado potato bug...,potato
171,insects,Organic,B00743LJ0G,Rhonda A. Dukart,"Reviewed in the United States on September 10,...",Used in my garden to stop lady bugs eating my ...,1.0 out of 5 stars,Verified Purchase,[potatoes],used in my garden to stop lady bugs eating my ...,potato
310,insects,Organic,B00D8NQ26C,Lonesomerider,"Reviewed in the United States on August 3, 2021",I had to special order this from another sourc...,5.0 out of 5 stars,Verified Purchase,"[tomato, potato, fruit]",i had to special order this from another sourc...,potato
1880,insects,Organic,B0924ZGWBL,Jessica E,"Reviewed in the United States on April 11, 2023",I like that this is an organic pesticide! Thi...,5.0 out of 5 stars,,[potato],i like that this is an organic pesticide! thi...,potato
...,...,...,...,...,...,...,...,...,...,...,...
4264,fungi,Organic,B00D8NQ26C,Lonesomerider,"Reviewed in the United States on August 3, 2021",I had to special order this from another sourc...,5.0 out of 5 stars,Verified Purchase,"[tomato, potato, fruit]",i had to special order this from another sourc...,potato
4516,fungi,Organic,B000UJVDXY,Jettblack,"Reviewed in the United States on July 6, 2022",I am no expert in the science around the actua...,4.0 out of 5 stars,Verified Purchase,[potato],i am no expert in the science around the actua...,potato
5511,fungi,Organic,B00BSULSHA,Forge Ahead,"Reviewed in the United States on September 4, ...",If you've had prolonged (weeks) of warm and fo...,5.0 out of 5 stars,Verified Purchase,"[tomato, tomato, tomato, potato, fungus, fungus]",if you've had prolonged (weeks) of warm and fo...,potato
6154,fungi,Chimique,B0049PL9PC,TheBowdens,"Reviewed in the United States on June 6, 2017",Works great for roses! However you only want t...,4.0 out of 5 stars,Verified Purchase,"[potatoes, insects, fungus]",works great for roses! however you only want t...,potato
