<a href="https://colab.research.google.com/github/VaishnaviDayanand/TrendFusion-Analytics/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import joblib
from textblob import TextBlob
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [3]:
# Load the dataset
df = pd.read_csv('/content/processed_Men_clothing_data.csv')

In [4]:
# Data Cleaning
df.dropna(inplace=True)  # Remove missing values

In [5]:
# Text Preprocessing
def preprocess_text(text):
    text = text.lower()
    text = re.sub(f'[{string.punctuation}]', '', text)
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)

In [6]:
df['cleaned_reviews'] = df['reviews'].apply(preprocess_text)

In [7]:
# Function to get sentiment
def get_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 1  # Positive
    elif analysis.sentiment.polarity < 0:
        return -1  # Negative
    else:
        return 0  # Neutral

In [8]:
df['sentiment'] = df['cleaned_reviews'].apply(get_sentiment)

In [9]:
# Split data
X = df['cleaned_reviews']
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Vectorize text data
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [11]:
# Train the model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

In [12]:
# Predict sentiment for the entire dataset
df['predicted_sentiment'] = model.predict(vectorizer.transform(df['cleaned_reviews']))

In [18]:
# Save the processed dataset
df.to_csv('processed_Men_with_sentiment_u.csv', index=False)

In [16]:
df.head()

Unnamed: 0.1,Unnamed: 0,product_name,sales_price,brand,colour,child_category,reviews,product_url,medium,cleaned_reviews,sentiment,predicted_sentiment
0,7,Pepe Jeans Men's Solid Regular fit Casual Shirt,919,JUST F,Blue1,MensCasualShirts,"Arrived damaged, very unhappy.",https://www.amazon.in/Pepe-Jeans-Regular-Casua...,https://images-na.ssl-images-amazon.com/images...,arrived damaged unhappy,-1,-1
1,24,Calvin Klein Jeans Men's Plain Slim fit Casual...,2749,Calvin Klein Jeans,Grey,MensCasualShirts,"Comfortable and stylish, love it!",https://www.amazon.in/Calvin-Klein-Sleeve-Stan...,https://images-na.ssl-images-amazon.com/images...,comfortable stylish love,1,1
2,31,Tommy Hilfiger Men's Checkered Regular fit Cas...,2639,Saree World,"Black, Grey Milanch",MensCasualShirts,"Fits like a glove, very flattering.",https://www.amazon.in/TOMMY-HILFIGER-Checkered...,https://images-na.ssl-images-amazon.com/images...,fit like glove flattering,1,1
3,52,CAVALLO by Linen Club: Red Checked Casual Line...,849,CAVALLO by Linen Club,D - Black Peach,MensCasualShirts,Fabric shrinks after washing.,https://www.amazon.in/CAVALLO-Linen-Club-Check...,https://images-na.ssl-images-amazon.com/images...,fabric shrink washing,0,0
4,90,Neostreak Men's Slim Fit Stretchable Jeans,589,VISVA DESIGNER,Olive,MensJeans,"Exactly as described, very satisfied.",https://www.amazon.in/Neostreak-Mens-Jeans-neo...,https://images-na.ssl-images-amazon.com/images...,exactly described satisfied,1,1


In [17]:
# Save the model and vectorizer
joblib.dump(model, 'sentiment_model_m_u.pkl')
joblib.dump(vectorizer, 'vectorizer_m_u.pkl')

['vectorizer_m_u.pkl']