 # Group Memebers 
 # 21AI 33 ,30 ,28 ,24 , 19 
# Project Title  "Sentiment Analysis"

# Data Loading


In [30]:

import pandas as pd

data = pd.read_csv('data.csv ')
print(data.head())



                                            Sentence Sentiment
0  The GeoSolutions technology will leverage Bene...  positive
1  $ESI on lows, down $1.50 to $2.50 BK a real po...  negative
2  For the last quarter of 2010 , Componenta 's n...  positive
3  According to the Finnish-Russian Chamber of Co...   neutral
4  The Swedish buyout firm has sold its remaining...   neutral


# Data cleaning


In [5]:
import nltk
nltk.download('stopwords')

import re
from nltk.corpus import stopwords

def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'[^A-Za-z0-9 ]+', '', text)  # Remove special characters
    text = text.lower()  # Convert to lowercase
    stop_words = set(stopwords.words('english'))
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

data['cleaned_text'] = data['Sentence'].apply(clean_text)


[nltk_data] Downloading package stopwords to C:\Users\Lenovo
[nltk_data]     X260\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Data preprocessing

In [7]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

nltk.download('punkt')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

# Lemmatization function
def preprocess_text(text):
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

data['preprocessed_text'] = data['cleaned_text'].apply(preprocess_text)


[nltk_data] Downloading package punkt to C:\Users\Lenovo
[nltk_data]     X260\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Lenovo
[nltk_data]     X260\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Featuere Extraction

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X = tfidf_vectorizer.fit_transform(data['preprocessed_text'])


 # Machine Learning Model

In [40]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


X_train, X_test, y_train, y_test = train_test_split(X, data['Sentiment'], test_size=0.2)


model = LogisticRegression()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')


Accuracy: 0.6689478186484175


 # Deployment

# currently deploye on a GUI app uing python tkinter

In [2]:
import tkinter as tk
from tkinter import messagebox
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

# Load your dataset (Make sure the path is correct)
data = pd.read_csv(r'E:\prog_work\data.csv')

# Preprocessing functions
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()

def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'[^A-Za-z0-9 ]+', '', text)  # Remove special characters
    text = text.lower()  # Convert to lowercase
    stop_words = set(stopwords.words('english'))
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

def preprocess_text(text):
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

# Clean and preprocess the dataset
data['cleaned_text'] = data['Sentence'].apply(clean_text)
data['preprocessed_text'] = data['cleaned_text'].apply(preprocess_text)

# Create a TF-IDF vectorizer and logistic regression model
tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X = tfidf_vectorizer.fit_transform(data['preprocessed_text'])
model = LogisticRegression()
model.fit(X, data['Sentiment'])

# Create a pipeline for prediction
pipeline = make_pipeline(tfidf_vectorizer, model)

# Function to predict sentiment
def predict_sentiment(text):
    cleaned_text = clean_text(text)
    preprocessed_text = preprocess_text(cleaned_text)
    prediction = pipeline.predict([preprocessed_text])
    return prediction[0]

# Tkinter App
def on_predict():
    user_input = text_input.get("1.0", tk.END).strip()
    if user_input:
        sentiment = predict_sentiment(user_input)
        messagebox.showinfo("Prediction", f"The predicted sentiment is: {sentiment}")
    else:
        messagebox.showwarning("Input Error", "Please enter some text.")

app = tk.Tk()
app.title("Sentiment Analysis App")

# Text input area
text_input = tk.Text(app, height=10, width=50)
text_input.pack(pady=10)

# Predict button
predict_button = tk.Button(app, text="Predict Sentiment", command=on_predict)
predict_button.pack(pady=5)

# Run the app
app.mainloop()


[nltk_data] Downloading package stopwords to C:\Users\Lenovo
[nltk_data]     X260\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\Lenovo
[nltk_data]     X260\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Lenovo
[nltk_data]     X260\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
